diff --git a/dev/.nojekyll b/dev/.nojekyll new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/dev/.nojekyll @@ -0,0 +1 @@ + diff --git a/dev/404.html b/dev/404.html new file mode 100644 index 0000000000000000000000000000000000000000..14b9c784f902672c54527b59971839ce8c82b1ad --- /dev/null +++ b/dev/404.html @@ -0,0 +1,191 @@ + + + +
+ + + + +This outlines how to propose a change to torch. For more detailed info about contributing to this, and other tidyverse packages, please see the development contributing guide.
+You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the source file. This generally means you’ll need to edit roxygen2 comments in an .R, not a .Rd file. You can find the .R file that generates the .Rd by reading the comment in the first line.
See also the [Documentation] section.
+If you find a bug in torch please open an issue here. Please, provide detailed information on how to reproduce the bug. It would be great to also provide a reprex.
Feel free to open issues here and add the feature-request tag. Try searching if there’s already an open issue for your feature-request, in this case it’s better to comment or upvote it intead of opening a new one.
We welcome contributed examples. feel free to open a PR with new examples. The should be placed in the vignettes/examples folder.
The examples should be an .R file and a .Rmd file with the same name that just renders the code.
+See mnist-mlp.R and mnist-mlp.Rmd
+One must be able to run the example without manually downloading any dataset/file. You should also add an entry to the _pkgdown.yaml file.
We have many open issues in the github repo if there’s one item that you want to work on, you can comment on it an ask for directions.
+YEAR: 2020 +COPYRIGHT HOLDER: Daniel Falbel ++ +
Copyright (c) 2020 Daniel Falbel
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+dir <- "~/Downloads/mnist" + +ds <- mnist_dataset( + dir, + download = TRUE, + transform = function(x) { + x <- x$to(dtype = torch_float())/256 + x[newaxis,..] + } +) +dl <- dataloader(ds, batch_size = 32, shuffle = TRUE) + +net <- nn_module( + "Net", + initialize = function() { + self$conv1 <- nn_conv2d(1, 32, 3, 1) + self$conv2 <- nn_conv2d(32, 64, 3, 1) + self$dropout1 <- nn_dropout2d(0.25) + self$dropout2 <- nn_dropout2d(0.5) + self$fc1 <- nn_linear(9216, 128) + self$fc2 <- nn_linear(128, 10) + }, + forward = function(x) { + x <- self$conv1(x) + x <- nnf_relu(x) + x <- self$conv2(x) + x <- nnf_relu(x) + x <- nnf_max_pool2d(x, 2) + x <- self$dropout1(x) + x <- torch_flatten(x, start_dim = 2) + x <- self$fc1(x) + x <- nnf_relu(x) + x <- self$dropout2(x) + x <- self$fc2(x) + output <- nnf_log_softmax(x, dim=1) + output + } +) + +model <- net() +optimizer <- optim_sgd(model$parameters, lr = 0.01) + +epochs <- 10 + +for (epoch in 1:10) { + + pb <- progress::progress_bar$new( + total = length(dl), + format = "[:bar] :eta Loss: :loss" + ) + l <- c() + + for (b in enumerate(dl)) { + optimizer$zero_grad() + output <- model(b[[1]]) + loss <- nnf_nll_loss(output, b[[2]]) + loss$backward() + optimizer$step() + l <- c(l, loss$item()) + pb$tick(tokens = list(loss = mean(l))) + } + + cat(sprintf("Loss at epoch %d: %3f\n", epoch, mean(l))) +}
library(torch) + +dir <- "~/Downloads/mnist" + +ds <- mnist_dataset( + dir, + download = TRUE, + transform = function(x) { + x <- x$to(dtype = torch_float())/256 + x <- 2*(x - 0.5) + x[newaxis,..] + } +) +dl <- dataloader(ds, batch_size = 32, shuffle = TRUE) + +generator <- nn_module( + "generator", + initialize = function(latent_dim, out_channels) { + self$main <- nn_sequential( + nn_conv_transpose2d(latent_dim, 512, kernel_size = 4, + stride = 1, padding = 0, bias = FALSE), + nn_batch_norm2d(512), + nn_relu(), + nn_conv_transpose2d(512, 256, kernel_size = 4, + stride = 2, padding = 1, bias = FALSE), + nn_batch_norm2d(256), + nn_relu(), + nn_conv_transpose2d(256, 128, kernel_size = 4, + stride = 2, padding = 1, bias = FALSE), + nn_batch_norm2d(128), + nn_relu(), + nn_conv_transpose2d(128, out_channels, kernel_size = 4, + stride = 2, padding = 3, bias = FALSE), + nn_tanh() + ) + }, + forward = function(input) { + self$main(input) + } +) + +discriminator <- nn_module( + "discriminator", + initialize = function(in_channels) { + self$main <- nn_sequential( + nn_conv2d(in_channels, 16, kernel_size = 4, stride = 2, padding = 1, bias = FALSE), + nn_leaky_relu(0.2, inplace = TRUE), + nn_conv2d(16, 32, kernel_size = 4, stride = 2, padding = 1, bias = FALSE), + nn_batch_norm2d(32), + nn_leaky_relu(0.2, inplace = TRUE), + nn_conv2d(32, 64, kernel_size = 4, stride = 2, padding = 1, bias = FALSE), + nn_batch_norm2d(64), + nn_leaky_relu(0.2, inplace = TRUE), + nn_conv2d(64, 128, kernel_size = 4, stride = 2, padding = 1, bias = FALSE), + nn_leaky_relu(0.2, inplace = TRUE) + ) + self$linear <- nn_linear(128, 1) + self$sigmoid <- nn_sigmoid() + }, + forward = function(input) { + x <- self$main(input) + x <- torch_flatten(x, start_dim = 2) + x <- self$linear(x) + self$sigmoid(x) + } +) + +plot_gen <- function(noise) { + img <- G(noise) + img <- img$cpu() + img <- img[1,1,,,newaxis]/2 + 0.5 + img <- torch_stack(list(img, img, img), dim = 2)[..,1] + img <- as.raster(as_array(img)) + plot(img) +} + +device <- torch_device(ifelse(cuda_is_available(), "cuda", "cpu")) + +G <- generator(latent_dim = 100, out_channels = 1) +D <- discriminator(in_channels = 1) + +init_weights <- function(m) { + if (grepl("conv", m$.classes[[1]])) { + nn_init_normal_(m$weight$data(), 0.0, 0.02) + } else if (grepl("batch_norm", m$.classes[[1]])) { + nn_init_normal_(m$weight$data(), 1.0, 0.02) + nn_init_constant_(m$bias$data(), 0) + } +} + +G[[1]]$apply(init_weights) +D[[1]]$apply(init_weights) + +G$to(device = device) +D$to(device = device) + +G_optimizer <- optim_adam(G$parameters, lr = 2 * 1e-4, betas = c(0.5, 0.999)) +D_optimizer <- optim_adam(D$parameters, lr = 2 * 1e-4, betas = c(0.5, 0.999)) + +fixed_noise <- torch_randn(1, 100, 1, 1, device = device) + +loss <- nn_bce_loss() + +for (epoch in 1:10) { + + pb <- progress::progress_bar$new( + total = length(dl), + format = "[:bar] :eta Loss D: :lossd Loss G: :lossg" + ) + lossg <- c() + lossd <- c() + + for (b in enumerate(dl)) { + + y_real <- torch_ones(32, device = device) + y_fake <- torch_zeros(32, device = device) + + noise <- torch_randn(32, 100, 1, 1, device = device) + fake <- G(noise) + + img <- b[[1]]$to(device = device) + + # train the discriminator --- + D_loss <- loss(D(img), y_real) + loss(D(fake$detach()), y_fake) + + D_optimizer$zero_grad() + D_loss$backward() + D_optimizer$step() + + # train the generator --- + + G_loss <- loss(D(fake), y_real) + + G_optimizer$zero_grad() + G_loss$backward() + G_optimizer$step() + + lossd <- c(lossd, D_loss$item()) + lossg <- c(lossg, G_loss$item()) + pb$tick(tokens = list(lossd = mean(lossd), lossg = mean(lossg))) + } + plot_gen(fixed_noise) + + cat(sprintf("Epoch %d - Loss D: %3f Loss G: %3f\n", epoch, mean(lossd), mean(lossg))) +}
dir <- "~/Downloads/mnist" + +ds <- mnist_dataset( + dir, + download = TRUE, + transform = function(x) { + x$to(dtype = torch_float())/256 + } +) +dl <- dataloader(ds, batch_size = 32, shuffle = TRUE) + +net <- nn_module( + "Net", + initialize = function() { + self$fc1 <- nn_linear(784, 128) + self$fc2 <- nn_linear(128, 10) + }, + forward = function(x) { + x %>% + torch_flatten(start_dim = 2) %>% + self$fc1() %>% + nnf_relu() %>% + self$fc2() %>% + nnf_log_softmax(dim = 1) + } +) + +model <- net() +optimizer <- optim_sgd(model$parameters, lr = 0.01) + +epochs <- 10 + +for (epoch in 1:10) { + + pb <- progress::progress_bar$new( + total = length(dl), + format = "[:bar] :eta Loss: :loss" + ) + l <- c() + + for (b in enumerate(dl)) { + optimizer$zero_grad() + output <- model(b[[1]]) + loss <- nnf_nll_loss(output, b[[2]]) + loss$backward() + optimizer$step() + l <- c(l, loss$item()) + pb$tick(tokens = list(loss = mean(l))) + } + + cat(sprintf("Loss at epoch %d: %3f\n", epoch, mean(l))) +}
library(torch)
Adding operations to autograd requires implementing a new autograd_function for each operation. Recall that autograd_functionss are what autograd uses to compute the results and gradients, and encode the operation history. Every new function requires you to implement 2 methods:
forward() - the code that performs the operation. It can take as many arguments as you want, with some of them being optional, if you specify the default values. All kinds of R objects are accepted here. Tensor arguments that track history (i.e., with requires_grad=TRUE) will be converted to ones that don’t track history before the call, and their use will be registered in the graph. Note that this logic won’t traverse lists or any other data structures and will only consider Tensor’s that are direct arguments to the call. You can return either a single Tensor output, or a list of Tensors if there are multiple outputs. Also, please refer to the docs of autograd_function to find descriptions of useful methods that can be called only from forward().
backward() - gradient formula. It will be given as many Tensor arguments as there were outputs, with each of them representing gradient w.r.t. that output. It should return as many Tensors as there were Tensor's that required gradients in forward, with each of them containing the gradient w.r.t. its corresponding input.
It’s the user’s responsibility to use the special functions in the forward’s ctx properly in order to ensure that the new autograd_function works properly with the autograd engine.
save_for_backward() must be used when saving input or ouput of the forward to be used later in the backward.
mark_dirty() must be used to mark any input that is modified inplace by the forward function.
mark_non_differentiable() must be used to tell the engine if an output is not differentiable.
Below you can find code for a linear function:
+linear <- autograd_function( + forward = function(ctx, input, weight, bias = NULL) { + ctx$save_for_backward(input = input, weight = weight, bias = bias) + output <- input$mm(weight$t()) + if (!is.null(bias)) + output <- output + bias$unsqueeze(0)$expand_as(output) + + output + }, + backward = function(ctx, grad_output) { + + s <- ctx$saved_variables + + grads <- list( + input = NULL, + weight = NULL, + bias = NULL + ) + + if (ctx$needs_input_grad$input) + grads$input <- grad_output$mm(s$weight) + + if (ctx$needs_input_grad$weight) + grads$weight <- grad_output$t()$mm(s$input) + + if (!is.null(s$bias) && ctx$needs_input_grad$bias) + grads$bias <- grad_output$sum(dim = 0) + + grads + } +)
Here, we give an additional example of a function that is parametrized by non-Tensor arguments:
+mul_constant <- autograd_function( + forward = function(ctx, tensor, constant) { + ctx$save_for_backward(constant = constant) + tensor * constant + }, + backward = function(ctx, grad_output) { + v <- ctx$saved_variables + list( + tensor = grad_output * v$constant + ) + } +)
x <- torch_tensor(1, requires_grad = TRUE) +o <- mul_constant(x, 2) +o$backward() +x$grad
library(torch)
In this article we describe the indexing operator for torch tensors and how it compares to the R indexing operator for arrays.
+Torch’s indexing semantics are closer to numpy’s semantics than R’s. You will find a lot of similarities between this article and the numpy indexing article available here.
Single element indexing for a 1-D tensors works mostly as expected. Like R, it is 1-based. Unlike R though, it accepts negative indices for indexing from the end of the array. (In R, negative indices are used to remove elements.)
+x <- torch_tensor(1:10) +x[1] +x[-1]
You can also subset matrices and higher dimensions arrays using the same syntax:
+ +Note that if one indexes a multidimensional tensor with fewer indices than dimensions, one gets an error, unlike in R that would flatten the array. For example:
+x[1]
It is possible to slice and stride arrays to extract sub-arrays of the same number of dimensions, but of different sizes than the original. This is best illustrated by a few examples:
+x <- torch_tensor(1:10) +x +x[2:5] +x[1:(-7)]
You can also use the 1:10:2 syntax which means: In the range from 1 to 10, take every second item. For example:
x[1:5:2]
Another special syntax is the N, meaning the size of the specified dimension.
x[5:N]
Like in R, you can take all elements in a dimension by leaving an index empty.
+Consider a matrix:
+x <- torch_randn(2, 3) +x
The following syntax will give you the first row:
+x[1,]
And this would give you the first 2 columns:
+x[,1:2]
By default, when indexing by a single integer, this dimension will be dropped to avoid the singleton dimension:
+x <- torch_randn(2, 3) +x[1,]$shape
You can optionally use the drop = FALSE argument to avoid dropping the dimension.
x[1,,drop = FALSE]$shape
It’s possible to add a new dimension to a tensor using index-like syntax:
+x <- torch_tensor(c(10)) +x$shape +x[, newaxis]$shape +x[, newaxis, newaxis]$shape
You can also use NULL instead of newaxis:
x[,NULL]$shape
Sometimes we don’t know how many dimensions a tensor has, but we do know what to do with the last available dimension, or the first one. To subsume all others, we can use ..:
z <- torch_tensor(1:125)$reshape(c(5,5,5)) +z[1,..] +z[..,1]
library(torch)
Central to data ingestion and preprocessing are datasets and data loaders.
+torch comes equipped with a bag of datasets related to, mostly, image recognition and natural language processing (e.g., mnist_dataset()), which can be iterated over by means of dataloaders:
# ...
+ds <- mnist_dataset(
+ dir,
+ download = TRUE,
+ transform = function(x) {
+ x <- x$to(dtype = torch_float())/256
+ x[newaxis,..]
+ }
+)
+
+dl <- dataloader(ds, batch_size = 32, shuffle = TRUE)
+
+for (b in enumerate(dl)) {
+ # ...Cf. vignettes/examples/mnist-cnn.R for a complete example.
What if you want to train on a different dataset? In these cases, you subclass Dataset, an abstract container that needs to know how to iterate over the given data. To that purpose, your subclass needs to implement .getitem(), and say what should be returned when the data loader is asking for the next batch.
In .getitem(), you can implement whatever preprocessing you require. Additionally, you should implement .length(), so users can find out how many items there are in the dataset.
While this may sound complicated, it is not at all. The base logic is straightforward – complexity will, naturally, correlate with how involved your preprocessing is. To provide you with a simple but functional prototype, here we show how to create your own dataset to train on Allison Horst's penguins.
Datasets are R6 classes created using the dataset() constructor. You can pass a name and various member functions. Among those should be initialize(), to create instance variables, .getitem(), to indicate how the data should be returned, and .length(), to say how many items we have.
In addition, any number of helper functions can be defined.
+Here, we assume the penguins have already been loaded, and all preprocessing consists in removing lines with NA values, transforming factors to numbers starting from 0, and converting from R data types to torch tensors.
In .getitem, we essentially decide how this data is going to be used: All variables besides species go into x, the predictor, and species will constitute y, the target. Predictor and target are returned in a list, to be accessed as batch[[1]] and batch[[2]] during training.
penguins_dataset <- dataset( + + name = "penguins_dataset", + + initialize = function() { + self$data <- self$prepare_penguin_data() + }, + + .getitem = function(index) { + + x <- self$data[index, 2:-1] + y <- self$data[index, 1]$to(torch_long()) + + list(x, y) + }, + + .length = function() { + self$data$size()[[1]] + }, + + prepare_penguin_data = function() { + + input <- na.omit(penguins) + # conveniently, the categorical data are already factors + input$species <- as.numeric(input$species) + input$island <- as.numeric(input$island) + input$sex <- as.numeric(input$sex) + + input <- as.matrix(input) + torch_tensor(input) + } +)
Let’s create the dataset , query for it’s length, and look at its first item:
+tuxes <- penguins_dataset() +tuxes$.length() +tuxes$.getitem(1)
To be able to iterate over tuxes, we need a data loader (we override the default batch size of 1):
dl <-tuxes %>% dataloader(batch_size = 8)
Calling .length() on a data loader (as opposed to a dataset) will return the number of batches we have:
dl$.length()
And we can create an iterator to inspect the first batch:
+iter <- dl$.iter() +b <- iter$.next() +b
To train a network, we can use enumerate to iterate over batches.
Our example network is very simple. (In reality, we would want to treat island as the categorical variable it is, and either one-hot-encode or embed it.)
net <- nn_module( + "PenguinNet", + initialize = function() { + self$fc1 <- nn_linear(6, 32) + self$fc2 <- nn_linear(32, 3) + }, + forward = function(x) { + x %>% + self$fc1() %>% + nnf_relu() %>% + self$fc2() %>% + nnf_log_softmax(dim = 1) + } +) + +model <- net()
We still need an optimizer:
+optimizer <- optim_sgd(model$parameters, lr = 0.01)
And we’re ready to train:
+ +library(torch)
In this article we describe various ways of creating torch tensors in R.
You can create tensors from R objects using the torch_tensor function. The torch_tensor function takes an R vector, matrix or array and creates an equivalent torch_tensor.
You can see a few examples below:
+torch_tensor(c(1,2,3)) + +# conform to row-major indexing used in torch +torch_tensor(matrix(1:10, ncol = 5, nrow = 2, byrow = TRUE)) +torch_tensor(array(runif(12), dim = c(2, 2, 3)))
By default, we will create tensors in the cpu device, converting their R datatype to the corresponding torch dtype.
++Note currently, only numeric and boolean types are supported.
+
You can always modify dtype and device when converting an R object to a torch tensor. For example:
torch_tensor(1, dtype = torch_long()) +torch_tensor(1, device = "cpu", dtype = torch_float64())
Other options available when creating a tensor are:
+requires_grad: boolean indicating if you want autograd to record operations on them for automatic differentiation.pin_memory: – If set, the tensor returned would be allocated in pinned memory. Works only for CPU tensors.These options are available for all functions that can be used to create new tensors, including the factory functions listed in the next section.
+You can also use the torch_* functions listed below to create torch tensors using some algorithm.
For example, the torch_randn function will create tensors using the normal distribution with mean 0 and standard deviation 1. You can use the ... argument to pass the size of the dimensions. For example, the code below will create a normally distributed tensor with shape 5x3.
x <- torch_randn(5, 3) +x
Another example is torch_ones, which creates a tensor filled with ones.
x <- torch_ones(2, 4, dtype = torch_int64(), device = "cpu") +x
Here is the full list of functions that can be used to bulk-create tensors in torch:
+torch_arange: Returns a tensor with a sequence of integers,torch_empty: Returns a tensor with uninitialized values,torch_eye: Returns an identity matrix,torch_full: Returns a tensor filled with a single value,torch_linspace: Returns a tensor with values linearly spaced in some interval,torch_logspace: Returns a tensor with values logarithmically spaced in some interval,torch_ones: Returns a tensor filled with all ones,torch_rand: Returns a tensor filled with values drawn from a uniform distribution on [0, 1).torch_randint: Returns a tensor with integers randomly drawn from an interval,torch_randn: Returns a tensor filled with values drawn from a unit normal distribution,torch_randperm: Returns a tensor filled with a random permutation of integers in some interval,torch_zeros: Returns a tensor filled with all zeros.Once a tensor exists you can convert between dtypes and move to a different device with to method. For example:
x <- torch_tensor(1) +y <- x$to(dtype = torch_int32()) +x +y
You can also copy a tensor to the GPU using:
+x <- torch_tensor(1) +y <- x$cuda())
library(torch)
So far, all we’ve been using from torch is tensors, but we’ve been performing all calculations ourselves – the computing the predictions, the loss, the gradients (and thus, the necessary updates to the weights), and the new weight values. In this chapter, we’ll make a significant change: Namely, we spare ourselves the cumbersome calculation of gradients, and have torch do it for us.
+Before we see that in action, let’s get some more background.
+Torch uses a module called autograd to record operations performed on tensors, and store what has to be done to obtain the respective gradients. These actions are stored as functions, and those functions are applied in order when the gradient of the output (normally, the loss) with respect to those tensors is calculated: starting from the output node and propagating gradients back through the network. This is a form of reverse mode automatic differentiation.
+As users, we can see a bit of this implementation. As a prerequisite for this “recording” to happen, tensors have to be created with requires_grad = TRUE. E.g.
x <- torch_ones(2,2, requires_grad = TRUE)
To be clear, this is a tensor with respect to which gradients have to be calculated – normally, a tensor representing a weight or a bias, not the input data 1. If we now perform some operation on that tensor, assigning the result to y
y <- x$mean()
we find that y now has a non-empty grad_fn that tells torch how to compute the gradient of y with respect to x:
y$grad_fn
Actual computation of gradients is triggered by calling backward() on the output tensor.
y$backward()
That executed, x now has a non-empty field grad that stores the gradient of y with respect to x:
x$grad
With a longer chain of computations, we can peek at how torch builds up a graph of backward operations.
+Here is a slightly more complex example. We call retain_grad() on y and z just for demonstration purposes; by default, intermediate gradients – while of course they have to be computed – aren’t stored, in order to save memory.
x1 <- torch_ones(2,2, requires_grad = TRUE) +x2 <- torch_tensor(1.1, requires_grad = TRUE) +y <- x1 * (x2 + 2) +y$retain_grad() +z <- y$pow(2) * 3 +z$retain_grad() +out <- z$mean()
Starting from out$grad_fn, we can follow the graph all back to the leaf nodes:
# how to compute the gradient for mean, the last operation executed +out$grad_fn +# how to compute the gradient for the multiplication by 3 in z = y$pow(2) * 3 +out$grad_fn$next_functions +# how to compute the gradient for pow in z = y.pow(2) * 3 +out$grad_fn$next_functions[[1]]$next_functions +# how to compute the gradient for the multiplication in y = x * (x + 2) +out$grad_fn$next_functions[[1]]$next_functions[[1]]$next_functions +# how to compute the gradient for the two branches of y = x * (x + 2), +# where the left branch is a leaf node (AccumulateGrad for x1) +out$grad_fn$next_functions[[1]]$next_functions[[1]]$next_functions[[1]]$next_functions +# here we arrive at the other leaf node (AccumulateGrad for x2) +out$grad_fn$next_functions[[1]]$next_functions[[1]]$next_functions[[1]]$next_functions[[2]]$next_functions
After calling out$backward(), all tensors in the graph will have their respective gradients created. Without our calls to retain_grad above, z$grad and y$grad would be empty:
out$backward() +z$grad +y$grad +x2$grad +x1$grad
Thus acquainted with autograd, we’re ready to modify our example.
+For a single new line calling loss$backward(), now a number of lines (that did manual backprop) are gone:
### generate training data ----------------------------------------------------- +# input dimensionality (number of input features) +d_in <- 3 +# output dimensionality (number of predicted features) +d_out <- 1 +# number of observations in training set +n <- 100 +# create random data +x <- torch_randn(n, d_in) +y <- x[,1]*0.2 - x[..,2]*1.3 - x[..,3]*0.5 + torch_randn(n) +y <- y$unsqueeze(dim = 1) +### initialize weights --------------------------------------------------------- +# dimensionality of hidden layer +d_hidden <- 32 +# weights connecting input to hidden layer +w1 <- torch_randn(d_in, d_hidden, requires_grad = TRUE) +# weights connecting hidden to output layer +w2 <- torch_randn(d_hidden, d_out, requires_grad = TRUE) +# hidden layer bias +b1 <- torch_zeros(1, d_hidden, requires_grad = TRUE) +# output layer bias +b2 <- torch_zeros(1, d_out,requires_grad = TRUE) +### network parameters --------------------------------------------------------- +learning_rate <- 1e-4 +### training loop -------------------------------------------------------------- +for (t in 1:200) { + + ### -------- Forward pass -------- + y_pred <- x$mm(w1)$add(b1)$clamp(min = 0)$mm(w2)$add(b2) + ### -------- compute loss -------- + loss <- (y_pred - y)$pow(2)$mean() + if (t %% 10 == 0) cat(t, as_array(loss), "\n") + ### -------- Backpropagation -------- + # compute the gradient of loss with respect to all tensors with requires_grad = True. + loss$backward() + + ### -------- Update weights -------- + + # Wrap in torch.no_grad() because this is a part we DON'T want to record for automatic gradient computation + with_no_grad({ + + w1$sub_(learning_rate * w1$grad) + w2$sub_(learning_rate * w2$grad) + b1$sub_(learning_rate * b1$grad) + b2$sub_(learning_rate * b2$grad) + + # Zero the gradients after every pass, because they'd accumulate otherwise + w1$grad$zero_() + w2$grad$zero_() + b1$grad$zero_() + b2$grad$zero_() + + }) + +}
We still manually compute the forward pass, and we still manually update the weights. In the last two chapters of this section, we’ll see how these parts of the logic can be made more modular and reusable, as well.
+Unless we want to change the data, as in adversarial example generation↩︎
Run:
+remotes::install_github("mlverse/torch")
At the first package load additional software will be installed.
+Currently this package is only a proof of concept and you can only create a Torch Tensor from an R object. And then convert back from a torch Tensor to an R object.
+library(torch) +x <- array(runif(8), dim = c(2, 2, 2)) +y <- torch_tensor(x, dtype = torch_float64()) +y +#> torch_tensor +#> (1,.,.) = +#> 0.8687 0.0157 +#> 0.4237 0.8971 +#> +#> (2,.,.) = +#> 0.4021 0.5509 +#> 0.3374 0.9034 +#> [ CPUDoubleType{2,2,2} ] +identical(x, as_array(y)) +#> [1] TRUE
In the following snippet we let torch, using the autograd feature, calculate the derivatives:
+x <- torch_tensor(1, requires_grad = TRUE) +w <- torch_tensor(2, requires_grad = TRUE) +b <- torch_tensor(3, requires_grad = TRUE) +y <- w * x + b +y$backward() +x$grad +#> torch_tensor +#> 2 +#> [ CPUFloatType{1} ] +w$grad +#> torch_tensor +#> 1 +#> [ CPUFloatType{1} ] +b$grad +#> torch_tensor +#> 1 +#> [ CPUFloatType{1} ]
In the following example we are going to fit a linear regression from scratch using torch’s Autograd.
+Note all methods that end with _ (eg. sub_), will modify the tensors in place.
x <- torch_randn(100, 2) +y <- 0.1 + 0.5*x[,1] - 0.7*x[,2] + +w <- torch_randn(2, 1, requires_grad = TRUE) +b <- torch_zeros(1, requires_grad = TRUE) + +lr <- 0.5 +for (i in 1:100) { + y_hat <- torch_mm(x, w) + b + loss <- torch_mean((y - y_hat$squeeze(1))^2) + + loss$backward() + + with_no_grad({ + w$sub_(w$grad*lr) + b$sub_(b$grad*lr) + + w$grad$zero_() + b$grad$zero_() + }) +} +print(w) +#> torch_tensor +#> 0.5000 +#> -0.7000 +#> [ CPUFloatType{2,1} ] +print(b) +#> torch_tensor +#> 0.01 * +#> 10.0000 +#> [ CPUFloatType{1} ]
Class representing the context.
+Class representing the context.
+ptr(Dev related) pointer to the context c++ object.
needs_input_gradboolean listing arguments of forward and whether they require_grad.
saved_variableslist of objects that were saved for backward via save_for_backward.
new()(Dev related) Initializes the context. Not user related.
AutogradContext$new( + ptr, + env, + argument_names = NULL, + argument_needs_grad = NULL +)
ptrpointer to the c++ object
envenvironment that encloses both forward and backward
argument_namesnames of forward arguments
argument_needs_gradwhether each argument in forward needs grad.
save_for_backward()Saves given objects for a future call to backward().
+This should be called at most once, and only from inside the forward()
+method.
Later, saved objects can be accessed through the saved_variables attribute.
+Before returning them to the user, a check is made to ensure they weren’t used
+in any in-place operation that modified their content.
Arguments can also be any kind of R object.
AutogradContext$save_for_backward(...)
...any kind of R object that will be saved for the backward pass. +It's common to pass named arguments.
mark_non_differentiable()Marks outputs as non-differentiable.
+This should be called at most once, only from inside the forward() method,
+and all arguments should be outputs.
This will mark outputs as not requiring gradients, increasing the efficiency
+of backward computation. You still need to accept a gradient for each output
+in backward(), but it’s always going to be a zero tensor with the same
+shape as the shape of a corresponding output.
This is used e.g. for indices returned from a max Function.
AutogradContext$mark_non_differentiable(...)
...non-differentiable outputs.
mark_dirty()Marks given tensors as modified in an in-place operation.
+This should be called at most once, only from inside the forward() method,
+and all arguments should be inputs.
Every tensor that’s been modified in-place in a call to forward() should
+be given to this function, to ensure correctness of our checks. It doesn’t
+matter whether the function is called before or after modification.
AutogradContext$mark_dirty(...)
...tensors that are modified in-place.
clone()The objects of this class are cloneable with this method.
AutogradContext$clone(deep = FALSE)
deepWhether to make a deep clone.
R/autograd.R
+ autograd_backward.RdThe graph is differentiated using the chain rule. If any of tensors are
+non-scalar (i.e. their data has more than one element) and require gradient,
+then the Jacobian-vector product would be computed, in this case the function
+additionally requires specifying grad_tensors. It should be a sequence of
+matching length, that contains the “vector” in the Jacobian-vector product,
+usually the gradient of the differentiated function w.r.t. corresponding
+tensors (None is an acceptable value for all tensors that don’t need gradient
+tensors).
autograd_backward( + tensors, + grad_tensors = NULL, + retain_graph = create_graph, + create_graph = FALSE +)+ +
| tensors | +(list of Tensor) – Tensors of which the derivative will +be computed. |
+
|---|---|
| grad_tensors | +(list of (Tensor or |
+
| retain_graph | +(bool, optional) – If |
+
| create_graph | +(bool, optional) – If |
+
This function accumulates gradients in the leaves - you might need to zero +them before calling it.
+ ++if (torch_is_installed()) { +x <- torch_tensor(1, requires_grad = TRUE) +y <- 2 * x + +a <- torch_tensor(1, requires_grad = TRUE) +b <- 3 * a + +autograd_backward(list(y, b)) + +}
R/autograd.R
+ autograd_function.RdEvery operation performed on Tensor's creates a new function object, that
+performs the computation, and records that it happened. The history is
+retained in the form of a DAG of functions, with edges denoting data
+dependencies (input <- output). Then, when backward is called, the graph is
+processed in the topological ordering, by calling backward() methods of each
+Function object, and passing returned gradients on to next Function's.
autograd_function(forward, backward)+ +
| forward | +Performs the operation. It must accept a context |
+
|---|---|
| backward | +Defines a formula for differentiating the operation. It must accept
+a context |
+
+if (torch_is_installed()) { + +exp2 <- autograd_function( + forward = function(ctx, i) { + result <- i$exp() + ctx$save_for_backward(result = result) + result + }, + backward = function(ctx, grad_output) { + list(i = grad_output * ctx$saved_variable$result) + } +) + +}
R/autograd.R
+ autograd_grad.Rdgrad_outputs should be a list of length matching output containing the “vector”
+in Jacobian-vector product, usually the pre-computed gradients w.r.t. each of
+the outputs. If an output doesn’t require_grad, then the gradient can be None).
autograd_grad( + outputs, + inputs, + grad_outputs = NULL, + retain_graph = create_graph, + create_graph = FALSE, + allow_unused = FALSE +)+ +
| outputs | +(sequence of Tensor) – outputs of the differentiated function. |
+
|---|---|
| inputs | +(sequence of Tensor) – Inputs w.r.t. which the gradient will be +returned (and not accumulated into .grad). |
+
| grad_outputs | +(sequence of Tensor) – The “vector” in the Jacobian-vector
+product. Usually gradients w.r.t. each output. None values can be specified for
+scalar Tensors or ones that don’t require grad. If a None value would be acceptable
+for all |
+
| retain_graph | +(bool, optional) – If |
+
| create_graph | +(bool, optional) – If |
+
| allow_unused | +(bool, optional) – If |
+
If only_inputs is TRUE, the function will only return a list of gradients w.r.t
+the specified inputs. If it’s FALSE, then gradient w.r.t. all remaining leaves
+will still be computed, and will be accumulated into their .grad attribute.
+if (torch_is_installed()) { +w <- torch_tensor(0.5, requires_grad = TRUE) +b <- torch_tensor(0.9, requires_grad = TRUE) +x <- torch_tensor(runif(100)) +y <- 2 * x + 1 +loss <- (y - (w*x + b))^2 +loss <- loss$mean() + +o <- autograd_grad(loss, list(w, b)) +o + +}
Returns the index of a currently selected device.
+cuda_current_device()
+
+
+
+ Returns the number of GPUs available.
+cuda_device_count()
+
+
+
+ R/cuda.R
+ cuda_is_available.RdReturns a bool indicating if CUDA is currently available.
+cuda_is_available()
+
+
+
+ R/utils-data-dataloader.R
+ dataloader.RdData loader. Combines a dataset and a sampler, and provides +single- or multi-process iterators over the dataset.
+dataloader( + dataset, + batch_size = 1, + shuffle = FALSE, + sampler = NULL, + batch_sampler = NULL, + num_workers = 0, + collate_fn = NULL, + pin_memory = FALSE, + drop_last = FALSE, + timeout = 0, + worker_init_fn = NULL +)+ +
| dataset | +(Dataset): dataset from which to load the data. |
+
|---|---|
| batch_size | +(int, optional): how many samples per batch to load
+(default: |
+
| shuffle | +(bool, optional): set to |
+
| sampler | +(Sampler, optional): defines the strategy to draw samples from
+the dataset. If specified, |
+
| batch_sampler | +(Sampler, optional): like sampler, but returns a batch of
+indices at a time. Mutually exclusive with |
+
| num_workers | +(int, optional): how many subprocesses to use for data
+loading. 0 means that the data will be loaded in the main process.
+(default: |
+
| collate_fn | +(callable, optional): merges a list of samples to form a mini-batch. |
+
| pin_memory | +(bool, optional): If |
+
| drop_last | +(bool, optional): set to |
+
| timeout | +(numeric, optional): if positive, the timeout value for collecting a batch
+from workers. Should always be non-negative. (default: |
+
| worker_init_fn | +(callable, optional): If not |
+
R/utils-data-dataloader.R
+ dataloader_make_iter.RdCreates an iterator from a DataLoader
+dataloader_make_iter(dataloader)+ +
| dataloader | +a dataloader object. |
+
|---|
R/utils-data-dataloader.R
+ dataloader_next.RdGet the next element of a dataloader iterator
+dataloader_next(iter)+ +
| iter | +a DataLoader iter created with dataloader_make_iter. |
+
|---|
Dataset. — dataset • torchAll datasets that represent a map from keys to data samples should subclass
+it. All subclasses should overwrite get_item, supporting fetching a
+data sample for a given key. Subclasses could also optionally overwrite
+lenght, which is expected to return the size of the dataset by many
+~torch.utils.data.Sampler implementations and the default options
+of ~torch.utils.data.DataLoader.
dataset(name = NULL, inherit = Dataset, ..., parent_env = parent.frame())+ +
| name | +a name for the dataset. It it's also used as the class +for it. |
+
|---|---|
| inherit | +you can optionally inherit from a dataset when creating a +new dataset. |
+
| ... | +public methods for the dataset class |
+
| parent_env | +An environment to use as the parent of newly-created +objects. |
+
~torch.utils.data.DataLoader by default constructs a index
+sampler that yields integral indices. To make it work with a map-style
+dataset with non-integral indices/keys, a custom sampler must be provided.
Gets and sets the default floating point dtype.
+torch_set_default_dtype(d) + +torch_get_default_dtype()+ +
| d | +The default floating point dtype to set. Initially set to
+ |
+
|---|
Enumerate an iterator
+# S3 method for dataloader +enumerate(x, max_len = 1e+06, ...)+ +
| x | +the generator to enumerate. |
+
|---|---|
| max_len | +maximum number of iterations. |
+
| ... | +passed to specific methods. |
+
+ All functions+ + |
+ |
|---|---|
| + + | +Class representing the context. |
+
| + + | +Converts to array |
+
| + + | +Computes the sum of gradients of given tensors w.r.t. graph leaves. |
+
| + + | +Records operation history and defines formulas for differentiating ops. |
+
| + + | +Computes and returns the sum of gradients of outputs w.r.t. the inputs. |
+
| + + | +Set grad mode |
+
| + + | +Returns the index of a currently selected device. |
+
| + + | +Returns the number of GPUs available. |
+
| + + | +Returns a bool indicating if CUDA is currently available. |
+
| + + | +Data loader. Combines a dataset and a sampler, and provides +single- or multi-process iterators over the dataset. |
+
| + + | +Creates an iterator from a DataLoader |
+
| + + | +Get the next element of a dataloader iterator |
+
| + + | +An abstract class representing a |
+
| + + | +Gets and sets the default floating point dtype. |
+
| + + | +Enumerate an iterator |
+
| + + | +Enumerate an iterator |
+
| + + | +Install Torch |
+
| + + | +Checks if the object is a dataloader |
+
| + + | +Check if object is a torch data type |
+
| + + | +Check if an object is a torch layout. |
+
| + + | +Check if an object is a memory format |
+
| + + | +Checks if an object is a QScheme |
+
| + + | +Load a state dict file |
+
| + + | +Applies a 1D adaptive average pooling over an input signal composed of several input planes. |
+
| + + | +Applies a 2D adaptive average pooling over an input signal composed of several input planes. |
+
| + + | +Applies a 3D adaptive average pooling over an input signal composed of several input planes. |
+
| + + | +AdaptiveLogSoftmaxWithLoss module |
+
| + + | +Applies a 1D adaptive max pooling over an input signal composed of several input planes. |
+
| + + | +Applies a 2D adaptive max pooling over an input signal composed of several input planes. |
+
| + + | +Applies a 3D adaptive max pooling over an input signal composed of several input planes. |
+
| + + | +Applies a 1D average pooling over an input signal composed of several +input planes. |
+
| + + | +Applies a 2D average pooling over an input signal composed of several input +planes. |
+
| + + | +Applies a 3D average pooling over an input signal composed of several input +planes. |
+
| + + | +BatchNorm1D module |
+
| + + | +BatchNorm2D |
+
| + + | +Binary cross entropy loss |
+
| + + | +Bilinear module |
+
| + + | +CELU module |
+
| + + | +Conv1D module |
+
| + + | +Conv2D module |
+
| + + | +Conv3D module |
+
| + + | +ConvTranspose1D |
+
| + + | +ConvTranpose2D module |
+
| + + | +ConvTranpose3D module |
+
| + + | +CrossEntropyLoss module |
+
| + + | +Dropout module |
+
| + + | +Dropout2D module |
+
| + + | +Dropout3D module |
+
| + + | +ELU module |
+
| + + | +Embedding module |
+
| + + | +Applies a 2D fractional max pooling over an input signal composed of several input planes. |
+
| + + | +Applies a 3D fractional max pooling over an input signal composed of several input planes. |
+
| + + | +GELU module |
+
| + + | +GLU module |
+
| + + | +Hardshwink module |
+
| + + | +Hardsigmoid module |
+
| + + | +Hardswish module |
+
| + + | +Hardtanh module |
+
| + + | +Identity module |
+
| + + | +Calculate gain |
+
| + + | +Constant initialization |
+
| + + | +Dirac initialization |
+
| + + | +Eye initialization |
+
| + + | +Kaiming normal initialization |
+
| + + | +Kaiming uniform initialization |
+
| + + | +Normal initialization |
+
| + + | +Ones initialization |
+
| + + | +Orthogonal initialization |
+
| + + | +Sparse initialization |
+
| + + | +Truncated normal initialization |
+
| + + | +Uniform initialization |
+
| + + | +Xavier normal initialization |
+
| + + | +Xavier uniform initialization |
+
| + + | +Zeros initialization |
+
| + + | +LeakyReLU module |
+
| + + | +Linear module |
+
| + + | +LogSigmoid module |
+
| + + | +LogSoftmax module |
+
| + + | +Applies a 1D power-average pooling over an input signal composed of several input +planes. |
+
| + + | +Applies a 2D power-average pooling over an input signal composed of several input +planes. |
+
| + + | +MaxPool1D module |
+
| + + | +MaxPool2D module |
+
| + + | +Applies a 3D max pooling over an input signal composed of several input +planes. |
+
| + + | +Computes a partial inverse of |
+
| + + | +Computes a partial inverse of |
+
| + + | +Computes a partial inverse of |
+
| + + | +Base class for all neural network modules. |
+
| + + | +Holds submodules in a list. |
+
| + + | +MultiHead attention |
+
| + + | +PReLU module |
+
| + + | +ReLU module |
+
| + + | +ReLu6 module |
+
| + + | +RNN module |
+
| + + | +RReLU module |
+
| + + | +SELU module |
+
| + + | +A sequential container |
+
| + + | +Sigmoid module |
+
| + + | +Softmax module |
+
| + + | +Softmax2d module |
+
| + + | +Softmin |
+
| + + | +Softplus module |
+
| + + | +Softshrink module |
+
| + + | +Softsign module |
+
| + + | +Tanh module |
+
| + + | +Tanhshrink module |
+
| + + | +Threshoold module |
+
| + + | +Packs a Tensor containing padded sequences of variable length. |
+
| + + | +Packs a list of variable length Tensors |
+
| + + | +Pads a packed batch of variable length sequences. |
+
| + + | +Pad a list of variable length Tensors with |
+
| + + | +Adaptive_avg_pool1d |
+
| + + | +Adaptive_avg_pool2d |
+
| + + | +Adaptive_avg_pool3d |
+
| + + | +Adaptive_max_pool1d |
+
| + + | +Adaptive_max_pool2d |
+
| + + | +Adaptive_max_pool3d |
+
| + + | +Affine_grid |
+
| + + | +Alpha_dropout |
+
| + + | +Avg_pool1d |
+
| + + | +Avg_pool2d |
+
| + + | +Avg_pool3d |
+
| + + | +Batch_norm |
+
| + + | +Bilinear |
+
| + + | +Binary_cross_entropy |
+
| + + | +Binary_cross_entropy_with_logits |
+
| + + | +Celu |
+
| + + | +Conv1d |
+
| + + | +Conv2d |
+
| + + | +Conv3d |
+
| + + | +Conv_tbc |
+
| + + | +Conv_transpose1d |
+
| + + | +Conv_transpose2d |
+
| + + | +Conv_transpose3d |
+
| + + | +Cosine_embedding_loss |
+
| + + | +Cosine_similarity |
+
| + + | +Cross_entropy |
+
| + + | +Ctc_loss |
+
| + + | +Dropout |
+
| + + | +Dropout2d |
+
| + + | +Dropout3d |
+
| + + | +Elu |
+
| + + | +Embedding |
+
| + + | +Embedding_bag |
+
| + + | +Fold |
+
| + + | +Fractional_max_pool2d |
+
| + + | +Fractional_max_pool3d |
+
| + + | +Gelu |
+
| + + | +Glu |
+
| + + | +Grid_sample |
+
| + + | +Group_norm |
+
| + + | +Gumbel_softmax |
+
| + + | +Hardshrink |
+
| + + | +Hardsigmoid |
+
| + + | +Hardswish |
+
| + + | +Hardtanh |
+
| + + | +Hinge_embedding_loss |
+
| + + | +Instance_norm |
+
| + + | +Interpolate |
+
| + + | +Kl_div |
+
| + + | +L1_loss |
+
| + + | +Layer_norm |
+
| + + | +Leaky_relu |
+
| + + | +Linear |
+
| + + | +Local_response_norm |
+
| + + | +Log_softmax |
+
| + + | +Logsigmoid |
+
| + + | +Lp_pool1d |
+
| + + | +Lp_pool2d |
+
| + + | +Margin_ranking_loss |
+
| + + | +Max_pool1d |
+
| + + | +Max_pool2d |
+
| + + | +Max_pool3d |
+
| + + | +Max_unpool1d |
+
| + + | +Max_unpool2d |
+
| + + | +Max_unpool3d |
+
| + + | +Mse_loss |
+
| + + | +Multi head attention forward |
+
| + + | +Multi_margin_loss |
+
| + + | +Multilabel_margin_loss |
+
| + + | +Multilabel_soft_margin_loss |
+
| + + | +Nll_loss |
+
| + + | +Normalize |
+
| + + | +One_hot |
+
| + + | +Pad |
+
| + + | +Pairwise_distance |
+
| + + | +Pdist |
+
| + + | +Pixel_shuffle |
+
| + + | +Poisson_nll_loss |
+
| + + | +Prelu |
+
| + + | +Relu |
+
| + + | +Relu6 |
+
| + + | +Rrelu |
+
| + + | +Selu |
+
| + + | +Sigmoid |
+
| + + | +Smooth_l1_loss |
+
| + + | +Soft_margin_loss |
+
| + + | +Softmax |
+
| + + | +Softmin |
+
| + + | +Softplus |
+
| + + | +Softshrink |
+
| + + | +Softsign |
+
| + + | +Tanhshrink |
+
| + + | +Threshold |
+
| + + | +Triplet_margin_loss |
+
| + + | +Unfold |
+
| + + | +Implements Adam algorithm. |
+
| + + | +Dummy value indicating a required value. |
+
| + + | +SGD optimizer |
+
| + + | +Dataset wrapping tensors. |
+
| + + | +Abs |
+
| + + | +Acos |
+
| + + | +Adaptive_avg_pool1d |
+
| + + | +Add |
+
| + + | +Addbmm |
+
| + + | +Addcdiv |
+
| + + | +Addcmul |
+
| + + | +Addmm |
+
| + + | +Addmv |
+
| + + | +Addr |
+
| + + | +Allclose |
+
| + + | +Angle |
+
| + + | +Arange |
+
| + + | +Argmax |
+
| + + | +Argmin |
+
| + + | +Argsort |
+
| + + | +As_strided |
+
| + + | +Asin |
+
| + + | +Atan |
+
| + + | +Atan2 |
+
| + + | +Avg_pool1d |
+
| + + | +Baddbmm |
+
| + + | +Bartlett_window |
+
| + + | +Bernoulli |
+
| + + | +Bincount |
+
| + + | +Bitwise_and |
+
| + + | +Bitwise_not |
+
| + + | +Bitwise_or |
+
| + + | +Bitwise_xor |
+
| + + | +Blackman_window |
+
| + + | +Bmm |
+
| + + | +Broadcast_tensors |
+
| + + | +Can_cast |
+
| + + | +Cartesian_prod |
+
| + + | +Cat |
+
| + + | +Cdist |
+
| + + | +Ceil |
+
| + + | +Celu_ |
+
| + + | +Chain_matmul |
+
| + + | +Cholesky |
+
| + + | +Cholesky_inverse |
+
| + + | +Cholesky_solve |
+
| + + | +Chunk |
+
| + + | +Clamp |
+
| + + | +Combinations |
+
| + + | +Conj |
+
| + + | +Conv1d |
+
| + + | +Conv2d |
+
| + + | +Conv3d |
+
| + + | +Conv_tbc |
+
| + + | +Conv_transpose1d |
+
| + + | +Conv_transpose2d |
+
| + + | +Conv_transpose3d |
+
| + + | +Cos |
+
| + + | +Cosh |
+
| + + | +Cosine_similarity |
+
| + + | +Cross |
+
| + + | +Cummax |
+
| + + | +Cummin |
+
| + + | +Cumprod |
+
| + + | +Cumsum |
+
| + + | +Det |
+
| + + | +Create a Device object |
+
| + + | +Diag |
+
| + + | +Diag_embed |
+
| + + | +Diagflat |
+
| + + | +Diagonal |
+
| + + | +Digamma |
+
| + + | +Dist |
+
| + + | +Div |
+
| + + | +Dot |
+
|
+
|
+ Torch data types |
+
| + + | +Eig |
+
| + + | +Einsum |
+
| + + | +Empty |
+
| + + | +Empty_like |
+
| + + | +Empty_strided |
+
| + + | +Eq |
+
| + + | +Equal |
+
| + + | +Erf |
+
| + + | +Erfc |
+
| + + | +Erfinv |
+
| + + | +Exp |
+
| + + | +Expm1 |
+
| + + | +Eye |
+
| + + | +Fft |
+
| + + | +Floating point type info |
+
| + + | +Flatten |
+
| + + | +Flip |
+
| + + | +Floor |
+
| + + | +Floor_divide |
+
| + + | +Fmod |
+
| + + | +Frac |
+
| + + | +Full |
+
| + + | +Full_like |
+
| + + | +Gather |
+
| + + | +Ge |
+
| + + | +Create a Generator object |
+
| + + | +Geqrf |
+
| + + | +Ger |
+
| + + | +Gt |
+
| + + | +Hamming_window |
+
| + + | +Hann_window |
+
| + + | +Histc |
+
| + + | +Ifft |
+
| + + | +Integer type info |
+
| + + | +Imag |
+
| + + | +Index_select |
+
| + + | +Inverse |
+
| + + | +Irfft |
+
| + + | +Is_complex |
+
| + + | +Is_floating_point |
+
| + + | +Verifies if torch is installed |
+
| + + | +Isfinite |
+
| + + | +Isinf |
+
| + + | +Isnan |
+
| + + | +Kthvalue |
+
| + + | +Creates the corresponding layout |
+
| + + | +Le |
+
| + + | +Lerp |
+
| + + | +Lgamma |
+
| + + | +Linspace |
+
| + + | +Loads a saved object |
+
| + + | +Log |
+
| + + | +Log10 |
+
| + + | +Log1p |
+
| + + | +Log2 |
+
| + + | +Logdet |
+
| + + | +Logical_and |
+
| + + | +Logical_not |
+
| + + | +Logical_or |
+
| + + | +Logical_xor |
+
| + + | +Logspace |
+
| + + | +Logsumexp |
+
| + + | +Lstsq |
+
| + + | +Lt |
+
| + + | +LU |
+
| + + | +Lu_solve |
+
| + + | +Sets the seed for generating random numbers. |
+
| + + | +Masked_select |
+
| + + | +Matmul |
+
| + + | +Matrix_power |
+
| + + | +Matrix_rank |
+
| + + | +Max |
+
| + + | +Mean |
+
| + + | +Median |
+
|
+
|
+ Memory format |
+
| + + | +Meshgrid |
+
| + + | +Min |
+
| + + | +Mm |
+
| + + | +Mode |
+
| + + | +Mul |
+
| + + | +Multinomial |
+
| + + | +Mv |
+
| + + | +Mvlgamma |
+
| + + | +Narrow |
+
| + + | +Ne |
+
| + + | +Neg |
+
| + + | +Nonzero |
+
| + + | +Norm |
+
| + + | +Normal |
+
| + + | +Ones |
+
| + + | +Ones_like |
+
| + + | +Orgqr |
+
| + + | +Ormqr |
+
| + + | +Pdist |
+
| + + | +Pinverse |
+
| + + | +Pixel_shuffle |
+
| + + | +Poisson |
+
| + + | +Polygamma |
+
| + + | +Pow |
+
| + + | +Prod |
+
| + + | +Promote_types |
+
| + + | +Qr |
+
|
+
|
+ Creates the corresponding Scheme object |
+
| + + | +Quantize_per_channel |
+
| + + | +Quantize_per_tensor |
+
| + + | +Rand |
+
| + + | +Rand_like |
+
| + + | +Randint |
+
| + + | +Randint_like |
+
| + + | +Randn |
+
| + + | +Randn_like |
+
| + + | +Randperm |
+
| + + | +Range |
+
| + + | +Real |
+
| + + | +Reciprocal |
+
|
+
|
+ Creates the reduction objet |
+
| + + | +Relu_ |
+
| + + | +Remainder |
+
| + + | +Renorm |
+
| + + | +Repeat_interleave |
+
| + + | +Reshape |
+
| + + | +Result_type |
+
| + + | +Rfft |
+
| + + | +Roll |
+
| + + | +Rot90 |
+
| + + | +Round |
+
| + + | +Rrelu_ |
+
| + + | +Rsqrt |
+
| + + | +Saves an object to a disk file. |
+
| + + | +Selu_ |
+
| + + | +Sigmoid |
+
| + + | +Sign |
+
| + + | +Sin |
+
| + + | +Sinh |
+
| + + | +Slogdet |
+
| + + | +Solve |
+
| + + | +Sort |
+
| + + | +Sparse_coo_tensor |
+
| + + | +Split |
+
| + + | +Sqrt |
+
| + + | +Square |
+
| + + | +Squeeze |
+
| + + | +Stack |
+
| + + | +Std |
+
| + + | +Std_mean |
+
| + + | +Stft |
+
| + + | +Sum |
+
| + + | +Svd |
+
| + + | +Symeig |
+
| + + | +T |
+
| + + | +Take |
+
| + + | +Tan |
+
| + + | +Tanh |
+
| + + | +Converts R objects to a torch tensor |
+
| + + | +Tensordot |
+
| + + | +Threshold_ |
+
| + + | +Topk |
+
| + + | +Trace |
+
| + + | +Transpose |
+
| + + | +Trapz |
+
| + + | +Triangular_solve |
+
| + + | +Tril |
+
| + + | +Tril_indices |
+
| + + | +Triu |
+
| + + | +Triu_indices |
+
| + + | +True_divide |
+
| + + | +Trunc |
+
| + + | +Unbind |
+
| + + | +Unique_consecutive |
+
| + + | +Unsqueeze |
+
| + + | +Var |
+
| + + | +Var_mean |
+
| + + | +Where |
+
| + + | +Zeros |
+
| + + | +Zeros_like |
+
| + + | +Enable grad |
+
| + + | +Temporarily modify gradient recording. |
+
Installs Torch and its dependencies.
+install_torch( + version = "1.5.0", + type = install_type(version = version), + reinstall = FALSE, + path = install_path(), + ... +)+ +
| version | +The Torch version to install. |
+
|---|---|
| type | +The installation type for Torch. Valid values are |
+
| reinstall | +Re-install Torch even if its already installed? |
+
| path | +Optional path to install or check for an already existing installation. |
+
| ... | +other optional arguments (like |
+
When using path to install in a specific location, make sure the TORCH_HOME environment
+variable is set to this same path to reuse this installation. The TORCH_INSTALL environment
+variable can be set to 0 to prevent auto-installing torch and TORCH_LOAD set to 0
+to avoid loading dependencies automatically. These environment variables are meant for advanced use
+cases and troubleshootinng only.
This function should only be used to load models saved in python.
+For it to work correctly you need to use torch.save with the flag:
+_use_new_zipfile_serialization=True and also remove all nn.Parameter
+classes from the tensors in the dict.
load_state_dict(path)+ +
| path | +to the state dict file |
+
|---|
a named list of tensors.
+The above might change with development of this +in pytorch's C++ api.
+ +R/nn-pooling.R
+ nn_adaptive_avg_pool1d.RdThe output size is H, for any input size. +The number of output features is equal to the number of input planes.
+nn_adaptive_avg_pool1d(output_size)+ +
| output_size | +the target output size H |
+
|---|
+if (torch_is_installed()) { +# target output size of 5 +m = nn_adaptive_avg_pool1d(5) +input <- torch_randn(1, 64, 8) +output <- m(input) + +}
R/nn-pooling.R
+ nn_adaptive_avg_pool2d.RdThe output is of size H x W, for any input size. +The number of output features is equal to the number of input planes.
+nn_adaptive_avg_pool2d(output_size)+ +
| output_size | +the target output size of the image of the form H x W.
+Can be a tuple (H, W) or a single H for a square image H x H.
+H and W can be either a |
+
|---|
+if (torch_is_installed()) { +# target output size of 5x7 +m <- nn_adaptive_avg_pool2d(c(5,7)) +input <- torch_randn(1, 64, 8, 9) +output <- m(input) +# target output size of 7x7 (square) +m <- nn_adaptive_avg_pool2d(7) +input <- torch_randn(1, 64, 10, 9) +output <- m(input) + +}
R/nn-pooling.R
+ nn_adaptive_avg_pool3d.RdThe output is of size D x H x W, for any input size. +The number of output features is equal to the number of input planes.
+nn_adaptive_avg_pool3d(output_size)+ +
| output_size | +the target output size of the form D x H x W.
+Can be a tuple (D, H, W) or a single number D for a cube D x D x D.
+D, H and W can be either a |
+
|---|
+if (torch_is_installed()) { +# target output size of 5x7x9 +m <- nn_adaptive_avg_pool3d(c(5,7,9)) +input <- torch_randn(1, 64, 8, 9, 10) +output <- m(input) +# target output size of 7x7x7 (cube) +m <- nn_adaptive_avg_pool3d(7) +input <- torch_randn(1, 64, 10, 9, 8) +output <- m(input) + +}
R/nn_adaptive.R
+ nn_adaptive_log_softmax_with_loss.RdEfficient softmax approximation as described in +Efficient softmax approximation for GPUs by Edouard Grave, Armand Joulin, Moustapha Cissé, David Grangier, and Hervé Jégou
+nn_adaptive_log_softmax_with_loss( + in_features, + n_classes, + cutoffs, + div_value = 4, + head_bias = FALSE +)+ +
| in_features | +(int): Number of features in the input tensor |
+
|---|---|
| n_classes | +(int): Number of classes in the dataset |
+
| cutoffs | +(Sequence): Cutoffs used to assign targets to their buckets |
+
| div_value | +(float, optional): value used as an exponent to compute sizes +of the clusters. Default: 4.0 |
+
| head_bias | +(bool, optional): If |
+
NamedTuple with output and loss fields:
output is a Tensor of size N containing computed target
+log probabilities for each example
loss is a Scalar representing the computed negative +log likelihood loss
Adaptive softmax is an approximate strategy for training models with large +output spaces. It is most effective when the label distribution is highly +imbalanced, for example in natural language modelling, where the word +frequency distribution approximately follows the Zipf's law.
+Adaptive softmax partitions the labels into several clusters, according to +their frequency. These clusters may contain different number of targets +each.
+Additionally, clusters containing less frequent labels assign lower +dimensional embeddings to those labels, which speeds up the computation. +For each minibatch, only clusters for which at least one target is +present are evaluated.
+The idea is that the clusters which are accessed frequently +(like the first one, containing most frequent labels), should also be cheap +to compute -- that is, contain a small number of assigned labels. +We highly recommend taking a look at the original paper for more details.
cutoffs should be an ordered Sequence of integers sorted
+in the increasing order.
+It controls number of clusters and the partitioning of targets into
+clusters. For example setting cutoffs = c(10, 100, 1000)
+means that first 10 targets will be assigned
+to the 'head' of the adaptive softmax, targets 11, 12, ..., 100 will be
+assigned to the first cluster, and targets 101, 102, ..., 1000 will be
+assigned to the second cluster, while targets
+1001, 1002, ..., n_classes - 1 will be assigned
+to the last, third cluster.
div_value is used to compute the size of each additional cluster,
+which is given as
+\(\left\lfloor\frac{\mbox{in\_features}}{\mbox{div\_value}^{idx}}\right\rfloor\),
+where \(idx\) is the cluster index (with clusters
+for less frequent words having larger indices,
+and indices starting from \(1\)).
head_bias if set to True, adds a bias term to the 'head' of the
+adaptive softmax. See paper for details. Set to False in the official
+implementation.
This module returns a NamedTuple with output
+and loss fields. See further documentation for details.
To compute log-probabilities for all classes, the log_prob
+method can be used.
Labels passed as inputs to this module should be sorted according to
+their frequency. This means that the most frequent label should be
+represented by the index 0, and the least frequent
+label should be represented by the index n_classes - 1.
input: \((N, \mbox{in\_features})\)
target: \((N)\) where each value satisfies \(0 <= \mbox{target[i]} <= \mbox{n\_classes}\)
output1: \((N)\)
output2: Scalar
R/nn-pooling.R
+ nn_adaptive_max_pool1d.RdThe output size is H, for any input size. +The number of output features is equal to the number of input planes.
+nn_adaptive_max_pool1d(output_size, return_indices = FALSE)+ +
| output_size | +the target output size H |
+
|---|---|
| return_indices | +if |
+
+if (torch_is_installed()) { +# target output size of 5 +m <- nn_adaptive_max_pool1d(5) +input <- torch_randn(1, 64, 8) +output <- m(input) + +}
R/nn-pooling.R
+ nn_adaptive_max_pool2d.RdThe output is of size H x W, for any input size. +The number of output features is equal to the number of input planes.
+nn_adaptive_max_pool2d(output_size, return_indices = FALSE)+ +
| output_size | +the target output size of the image of the form H x W.
+Can be a tuple |
+
|---|---|
| return_indices | +if |
+
+if (torch_is_installed()) { +# target output size of 5x7 +m <- nn_adaptive_max_pool2d(c(5,7)) +input <- torch_randn(1, 64, 8, 9) +output <- m(input) +# target output size of 7x7 (square) +m <- nn_adaptive_max_pool2d(7) +input <- torch_randn(1, 64, 10, 9) +output <- m(input) + +}
R/nn-pooling.R
+ nn_adaptive_max_pool3d.RdThe output is of size D x H x W, for any input size. +The number of output features is equal to the number of input planes.
+nn_adaptive_max_pool3d(output_size, return_indices = FALSE)+ +
| output_size | +the target output size of the image of the form D x H x W.
+Can be a tuple (D, H, W) or a single D for a cube D x D x D.
+D, H and W can be either a |
+
|---|---|
| return_indices | +if |
+
+if (torch_is_installed()) { +# target output size of 5x7x9 +m <- nn_adaptive_max_pool3d(c(5,7,9)) +input <- torch_randn(1, 64, 8, 9, 10) +output <- m(input) +# target output size of 7x7x7 (cube) +m <- nn_adaptive_max_pool3d(7) +input <- torch_randn(1, 64, 10, 9, 8) +output <- m(input) + +}
R/nn-pooling.R
+ nn_avg_pool1d.RdIn the simplest case, the output value of the layer with input size \((N, C, L)\),
+output \((N, C, L_{out})\) and kernel_size \(k\)
+can be precisely described as:
$$ + \text{out}(N_i, C_j, l) = \frac{1}{k} \sum_{m=0}^{k-1} +\text{input}(N_i, C_j, \text{stride} \times l + m) +$$
+nn_avg_pool1d( + kernel_size, + stride = NULL, + padding = 0, + ceil_mode = FALSE, + count_include_pad = TRUE +)+ +
| kernel_size | +the size of the window |
+
|---|---|
| stride | +the stride of the window. Default value is |
+
| padding | +implicit zero padding to be added on both sides |
+
| ceil_mode | +when TRUE, will use |
+
| count_include_pad | +when TRUE, will include the zero-padding in the averaging calculation |
+
If padding is non-zero, then the input is implicitly zero-padded on both sides
+for padding number of points.
The parameters kernel_size, stride, padding can each be
+an int or a one-element tuple.
Input: \((N, C, L_{in})\)
Output: \((N, C, L_{out})\), where
$$ + L_{out} = \left\lfloor \frac{L_{in} + + 2 \times \text{padding} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { + +# pool with window of size=3, stride=2 +m <- nn_avg_pool1d(3, stride=2) +m(torch_randn(1, 1, 8)) + +}
R/nn-pooling.R
+ nn_avg_pool2d.RdIn the simplest case, the output value of the layer with input size \((N, C, H, W)\),
+output \((N, C, H_{out}, W_{out})\) and kernel_size \((kH, kW)\)
+can be precisely described as:
$$ + out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} +input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) +$$
+nn_avg_pool2d( + kernel_size, + stride = NULL, + padding = 0, + ceil_mode = FALSE, + count_include_pad = TRUE, + divisor_override = NULL +)+ +
| kernel_size | +the size of the window |
+
|---|---|
| stride | +the stride of the window. Default value is |
+
| padding | +implicit zero padding to be added on both sides |
+
| ceil_mode | +when TRUE, will use |
+
| count_include_pad | +when TRUE, will include the zero-padding in the averaging calculation |
+
| divisor_override | +if specified, it will be used as divisor, otherwise |
+
If padding is non-zero, then the input is implicitly zero-padded on both sides
+for padding number of points.
The parameters kernel_size, stride, padding can either be:
a single int -- in which case the same value is used for the height and width dimension
a tuple of two ints -- in which case, the first int is used for the height dimension,
+and the second int for the width dimension
Input: \((N, C, H_{in}, W_{in})\)
Output: \((N, C, H_{out}, W_{out})\), where
$$ + H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - + \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor +$$ +$$ + W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - + \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { + +# pool of square window of size=3, stride=2 +m <- nn_avg_pool2d(3, stride=2) +# pool of non-square window +m <- nn_avg_pool2d(c(3, 2), stride=c(2, 1)) +input <- torch_randn(20, 16, 50, 32) +output <- m(input) + +}
R/nn-pooling.R
+ nn_avg_pool3d.RdIn the simplest case, the output value of the layer with input size \((N, C, D, H, W)\),
+output \((N, C, D_{out}, H_{out}, W_{out})\) and kernel_size \((kD, kH, kW)\)
+can be precisely described as:
$$ + \begin{aligned} +\text{out}(N_i, C_j, d, h, w) ={} & \sum_{k=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} \\ +& \frac{\text{input}(N_i, C_j, \text{stride}[0] \times d + k, + \text{stride}[1] \times h + m, \text{stride}[2] \times w + n)} +{kD \times kH \times kW} +\end{aligned} +$$
+nn_avg_pool3d( + kernel_size, + stride = NULL, + padding = 0, + ceil_mode = FALSE, + count_include_pad = TRUE, + divisor_override = NULL +)+ +
| kernel_size | +the size of the window |
+
|---|---|
| stride | +the stride of the window. Default value is |
+
| padding | +implicit zero padding to be added on all three sides |
+
| ceil_mode | +when TRUE, will use |
+
| count_include_pad | +when TRUE, will include the zero-padding in the averaging calculation |
+
| divisor_override | +if specified, it will be used as divisor, otherwise |
+
If padding is non-zero, then the input is implicitly zero-padded on all three sides
+for padding number of points.
The parameters kernel_size, stride can either be:
a single int -- in which case the same value is used for the depth, height and width dimension
a tuple of three ints -- in which case, the first int is used for the depth dimension,
+the second int for the height dimension and the third int for the width dimension
Input: \((N, C, D_{in}, H_{in}, W_{in})\)
Output: \((N, C, D_{out}, H_{out}, W_{out})\), where
$$ + D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - + \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor +$$ +$$ + H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - + \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor +$$ +$$ + W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - + \text{kernel\_size}[2]}{\text{stride}[2]} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { + +# pool of square window of size=3, stride=2 +m = nn_avg_pool3d(3, stride=2) +# pool of non-square window +m = nn_avg_pool3d(c(3, 2, 2), stride=c(2, 1, 2)) +input = torch_randn(20, 16, 50,44, 31) +output = m(input) + +}
Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D +inputs with optional additional channel dimension) as described in the paper +Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
+nn_batch_norm1d( + num_features, + eps = 1e-05, + momentum = 0.1, + affine = TRUE, + track_running_stats = TRUE +)+ +
| num_features | +\(C\) from an expected input of size +\((N, C, L)\) or \(L\) from input of size \((N, L)\) |
+
|---|---|
| eps | +a value added to the denominator for numerical stability. +Default: 1e-5 |
+
| momentum | +the value used for the running_mean and running_var
+computation. Can be set to |
+
| affine | +a boolean value that when set to |
+
| track_running_stats | +a boolean value that when set to |
+
$$ +y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta +$$
+The mean and standard-deviation are calculated per-dimension over
+the mini-batches and \(\gamma\) and \(\beta\) are learnable parameter vectors
+of size C (where C is the input size). By default, the elements of \(\gamma\)
+are set to 1 and the elements of \(\beta\) are set to 0.
Also by default, during training this layer keeps running estimates of its
+computed mean and variance, which are then used for normalization during
+evaluation. The running estimates are kept with a default :attr:momentum
+of 0.1.
+If track_running_stats is set to FALSE, this layer then does not
+keep running estimates, and batch statistics are instead used during
+evaluation time as well.
This momentum argument is different from one used in optimizer
+classes and the conventional notion of momentum. Mathematically, the
+update rule for running statistics here is
+\(\hat{x}_{\mbox{new}} = (1 - \mbox{momentum}) \times \hat{x} + \mbox{momentum} \times x_t\),
+where \(\hat{x}\) is the estimated statistic and \(x_t\) is the
+new observed value.
Because the Batch Normalization is done over the C dimension, computing statistics
+on (N, L) slices, it's common terminology to call this Temporal Batch Normalization.
Input: \((N, C)\) or \((N, C, L)\)
Output: \((N, C)\) or \((N, C, L)\) (same shape as input)
+if (torch_is_installed()) { +# With Learnable Parameters +m <- nn_batch_norm1d(100) +# Without Learnable Parameters +m <- nn_batch_norm1d(100, affine = FALSE) +input <- torch_randn(20, 100) +output <- m(input) + +}
Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs +additional channel dimension) as described in the paper +Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift.
+nn_batch_norm2d( + num_features, + eps = 1e-05, + momentum = 0.1, + affine = TRUE, + track_running_stats = TRUE +)+ +
| num_features | +\(C\) from an expected input of size +\((N, C, H, W)\) |
+
|---|---|
| eps | +a value added to the denominator for numerical stability. +Default: 1e-5 |
+
| momentum | +the value used for the running_mean and running_var
+computation. Can be set to |
+
| affine | +a boolean value that when set to |
+
| track_running_stats | +a boolean value that when set to |
+
$$ + y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta +$$
+The mean and standard-deviation are calculated per-dimension over
+the mini-batches and \(\gamma\) and \(\beta\) are learnable parameter vectors
+of size C (where C is the input size). By default, the elements of \(\gamma\) are set
+to 1 and the elements of \(\beta\) are set to 0. The standard-deviation is calculated
+via the biased estimator, equivalent to torch_var(input, unbiased=FALSE).
+Also by default, during training this layer keeps running estimates of its
+computed mean and variance, which are then used for normalization during
+evaluation. The running estimates are kept with a default momentum
+of 0.1.
If track_running_stats is set to FALSE, this layer then does not
+keep running estimates, and batch statistics are instead used during
+evaluation time as well.
This momentum argument is different from one used in optimizer
+classes and the conventional notion of momentum. Mathematically, the
+update rule for running statistics here is
+\(\hat{x}_{\mbox{new}} = (1 - \mbox{momentum}) \times \hat{x} + \mbox{momentum} \times x_t\),
+where \(\hat{x}\) is the estimated statistic and \(x_t\) is the
+new observed value.
+Because the Batch Normalization is done over the C dimension, computing statistics
+on (N, H, W) slices, it's common terminology to call this Spatial Batch Normalization.
Input: \((N, C, H, W)\)
Output: \((N, C, H, W)\) (same shape as input)
+if (torch_is_installed()) { +# With Learnable Parameters +m <- nn_batch_norm2d(100) +# Without Learnable Parameters +m <- nn_batch_norm2d(100, affine=FALSE) +input <- torch_randn(20, 100, 35, 45) +output <- m(input) + +}
Creates a criterion that measures the Binary Cross Entropy +between the target and the output:
+nn_bce_loss(weight = NULL, reduction = "mean")+ +
| weight | +(Tensor, optional): a manual rescaling weight given to the loss
+of each batch element. If given, has to be a Tensor of size |
+
|---|---|
| reduction | +(string, optional): Specifies the reduction to apply to the output:
+ |
+
The unreduced (i.e. with reduction set to 'none') loss can be described as:
+$$
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
+l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right]
+$$
+where \(N\) is the batch size. If reduction is not 'none'
+(default 'mean'), then
$$ + \ell(x, y) = \left\{ \begin{array}{ll} +\mbox{mean}(L), & \mbox{if reduction} = \mbox{'mean';}\\ +\mbox{sum}(L), & \mbox{if reduction} = \mbox{'sum'.} +\end{array} +\right. +$$
+This is used for measuring the error of a reconstruction in for example +an auto-encoder. Note that the targets \(y\) should be numbers +between 0 and 1.
+Notice that if \(x_n\) is either 0 or 1, one of the log terms would be +mathematically undefined in the above loss equation. PyTorch chooses to set +\(\log (0) = -\infty\), since \(\lim_{x\to 0} \log (x) = -\infty\).
+However, an infinite term in the loss equation is not desirable for several reasons. +For one, if either \(y_n = 0\) or \((1 - y_n) = 0\), then we would be +multiplying 0 with infinity. Secondly, if we have an infinite loss value, then +we would also have an infinite term in our gradient, since +\(\lim_{x\to 0} \frac{d}{dx} \log (x) = \infty\).
+This would make BCELoss's backward method nonlinear with respect to \(x_n\), +and using it for things like linear regression would not be straight-forward. +Our solution is that BCELoss clamps its log function outputs to be greater than +or equal to -100. This way, we can always have a finite loss value and a linear +backward method.
+Input: \((N, *)\) where \(*\) means, any number of additional +dimensions
Target: \((N, *)\), same shape as the input
Output: scalar. If reduction is 'none', then \((N, *)\), same
+shape as input.
+if (torch_is_installed()) { +m <- nn_sigmoid() +loss <- nn_bce_loss() +input <- torch_randn(3, requires_grad=TRUE) +target <- torch_rand(3) +output <- loss(m(input), target) +output$backward() + +}
Applies a bilinear transformation to the incoming data +\(y = x_1^T A x_2 + b\)
+nn_bilinear(in1_features, in2_features, out_features, bias = TRUE)+ +
| in1_features | +size of each first input sample |
+
|---|---|
| in2_features | +size of each second input sample |
+
| out_features | +size of each output sample |
+
| bias | +If set to |
+
Input1: \((N, *, H_{in1})\) \(H_{in1}=\mbox{in1\_features}\) and +\(*\) means any number of additional dimensions. All but the last +dimension of the inputs should be the same.
Input2: \((N, *, H_{in2})\) where \(H_{in2}=\mbox{in2\_features}\).
Output: \((N, *, H_{out})\) where \(H_{out}=\mbox{out\_features}\) +and all but the last dimension are the same shape as the input.
weight: the learnable weights of the module of shape +\((\mbox{out\_features}, \mbox{in1\_features}, \mbox{in2\_features})\). +The values are initialized from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\), where +\(k = \frac{1}{\mbox{in1\_features}}\)
bias: the learnable bias of the module of shape \((\mbox{out\_features})\).
+If bias is TRUE, the values are initialized from
+\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\), where
+\(k = \frac{1}{\mbox{in1\_features}}\)
+if (torch_is_installed()) { +m <- nn_bilinear(20, 30, 50) +input1 <- torch_randn(128, 20) +input2 <- torch_randn(128, 30) +output = m(input1, input2) +print(output$size()) + +}
Applies the element-wise function:
+nn_celu(alpha = 1, inplace = FALSE)+ +
| alpha | +the \(\alpha\) value for the CELU formulation. Default: 1.0 |
+
|---|---|
| inplace | +can optionally do the operation in-place. Default: |
+
$$ + \mbox{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1)) +$$
+More details can be found in the paper +Continuously Differentiable Exponential Linear Units.
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applies a 1D convolution over an input signal composed of several input +planes. +In the simplest case, the output value of the layer with input size +\((N, C_{\mbox{in}}, L)\) and output \((N, C_{\mbox{out}}, L_{\mbox{out}})\) can be +precisely described as:
+nn_conv1d( + in_channels, + out_channels, + kernel_size, + stride = 1, + padding = 0, + dilation = 1, + groups = 1, + bias = TRUE, + padding_mode = "zeros" +)+ +
| in_channels | +(int): Number of channels in the input image |
+
|---|---|
| out_channels | +(int): Number of channels produced by the convolution |
+
| kernel_size | +(int or tuple): Size of the convolving kernel |
+
| stride | +(int or tuple, optional): Stride of the convolution. Default: 1 |
+
| padding | +(int or tuple, optional): Zero-padding added to both sides of +the input. Default: 0 |
+
| dilation | +(int or tuple, optional): Spacing between kernel +elements. Default: 1 |
+
| groups | +(int, optional): Number of blocked connections from input +channels to output channels. Default: 1 |
+
| bias | +(bool, optional): If |
+
| padding_mode | +(string, optional): |
+
$$ +\mbox{out}(N_i, C_{\mbox{out}_j}) = \mbox{bias}(C_{\mbox{out}_j}) + + \sum_{k = 0}^{C_{in} - 1} \mbox{weight}(C_{\mbox{out}_j}, k) +\star \mbox{input}(N_i, k) +$$
+where \(\star\) is the valid +cross-correlation operator, +\(N\) is a batch size, \(C\) denotes a number of channels, +\(L\) is a length of signal sequence.
stride controls the stride for the cross-correlation, a single
+number or a one-element tuple.
padding controls the amount of implicit zero-paddings on both sides
+for padding number of points.
dilation controls the spacing between the kernel points; also
+known as the à trous algorithm. It is harder to describe, but this
+link
+has a nice visualization of what dilation does.
groups controls the connections between inputs and outputs.
+in_channels and out_channels must both be divisible by
+groups. For example,
At groups=1, all inputs are convolved to all outputs.
At groups=2, the operation becomes equivalent to having two conv +layers side by side, each seeing half the input channels, +and producing half the output channels, and both subsequently +concatenated.
At groups= in_channels, each input channel is convolved with
+its own set of filters,
+of size \(\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor\).
Depending of the size of your kernel, several (of the last)
+columns of the input might be lost, because it is a valid
+cross-correlation, and not a full cross-correlation.
+It is up to the user to add proper padding.
When groups == in_channels and out_channels == K * in_channels,
+where K is a positive integer, this operation is also termed in
+literature as depthwise convolution.
+In other words, for an input of size \((N, C_{in}, L_{in})\),
+a depthwise convolution with a depthwise multiplier K, can be constructed by arguments
+\((C_{\mbox{in}}=C_{in}, C_{\mbox{out}}=C_{in} \times K, ..., \mbox{groups}=C_{in})\).
Input: \((N, C_{in}, L_{in})\)
Output: \((N, C_{out}, L_{out})\) where
$$ + L_{out} = \left\lfloor\frac{L_{in} + 2 \times \mbox{padding} - \mbox{dilation} + \times (\mbox{kernel\_size} - 1) - 1}{\mbox{stride}} + 1\right\rfloor +$$
+weight (Tensor): the learnable weights of the module of shape +\((\mbox{out\_channels}, \frac{\mbox{in\_channels}}{\mbox{groups}}, \mbox{kernel\_size})\). +The values of these weights are sampled from +\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where +\(k = \frac{groups}{C_{\mbox{in}} * \mbox{kernel\_size}}\)
bias (Tensor): the learnable bias of the module of shape
+(out_channels). If bias is TRUE, then the values of these weights are
+sampled from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where
+\(k = \frac{groups}{C_{\mbox{in}} * \mbox{kernel\_size}}\)
+if (torch_is_installed()) { +m <- nn_conv1d(16, 33, 3, stride=2) +input <- torch_randn(20, 16, 50) +output <- m(input) + +}
Applies a 2D convolution over an input signal composed of several input +planes.
+nn_conv2d( + in_channels, + out_channels, + kernel_size, + stride = 1, + padding = 0, + dilation = 1, + groups = 1, + bias = TRUE, + padding_mode = "zeros" +)+ +
| in_channels | +(int): Number of channels in the input image |
+
|---|---|
| out_channels | +(int): Number of channels produced by the convolution |
+
| kernel_size | +(int or tuple): Size of the convolving kernel |
+
| stride | +(int or tuple, optional): Stride of the convolution. Default: 1 |
+
| padding | +(int or tuple, optional): Zero-padding added to both sides of +the input. Default: 0 |
+
| dilation | +(int or tuple, optional): Spacing between kernel elements. Default: 1 |
+
| groups | +(int, optional): Number of blocked connections from input +channels to output channels. Default: 1 |
+
| bias | +(bool, optional): If |
+
| padding_mode | +(string, optional): |
+
In the simplest case, the output value of the layer with input size +\((N, C_{\mbox{in}}, H, W)\) and output \((N, C_{\mbox{out}}, H_{\mbox{out}}, W_{\mbox{out}})\) +can be precisely described as:
+$$ +\mbox{out}(N_i, C_{\mbox{out}_j}) = \mbox{bias}(C_{\mbox{out}_j}) + + \sum_{k = 0}^{C_{\mbox{in}} - 1} \mbox{weight}(C_{\mbox{out}_j}, k) \star \mbox{input}(N_i, k) +$$
+where \(\star\) is the valid 2D cross-correlation operator, +\(N\) is a batch size, \(C\) denotes a number of channels, +\(H\) is a height of input planes in pixels, and \(W\) is +width in pixels.
stride controls the stride for the cross-correlation, a single
+number or a tuple.
padding controls the amount of implicit zero-paddings on both
+sides for padding number of points for each dimension.
dilation controls the spacing between the kernel points; also
+known as the à trous algorithm. It is harder to describe, but this link_
+has a nice visualization of what dilation does.
groups controls the connections between inputs and outputs.
+in_channels and out_channels must both be divisible by
+groups. For example,
At groups=1, all inputs are convolved to all outputs.
At groups=2, the operation becomes equivalent to having two conv +layers side by side, each seeing half the input channels, +and producing half the output channels, and both subsequently +concatenated.
At groups= in_channels, each input channel is convolved with
+its own set of filters, of size:
+\(\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor\).
The parameters kernel_size, stride, padding, dilation can either be:
a single int -- in which case the same value is used for the height and
+width dimension
a tuple of two ints -- in which case, the first int is used for the height dimension,
+and the second int for the width dimension
Depending of the size of your kernel, several (of the last) +columns of the input might be lost, because it is a valid cross-correlation, +and not a full cross-correlation. +It is up to the user to add proper padding.
+When groups == in_channels and out_channels == K * in_channels,
+where K is a positive integer, this operation is also termed in
+literature as depthwise convolution.
+In other words, for an input of size :math:(N, C_{in}, H_{in}, W_{in}),
+a depthwise convolution with a depthwise multiplier K, can be constructed by arguments
+\((in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})\).
In some circumstances when using the CUDA backend with CuDNN, this operator
+may select a nondeterministic algorithm to increase performance. If this is
+undesirable, you can try to make the operation deterministic (potentially at
+a performance cost) by setting backends_cudnn_deterministic = TRUE.
Input: \((N, C_{in}, H_{in}, W_{in})\)
Output: \((N, C_{out}, H_{out}, W_{out})\) where +$$ + H_{out} = \left\lfloor\frac{H_{in} + 2 \times \mbox{padding}[0] - \mbox{dilation}[0] + \times (\mbox{kernel\_size}[0] - 1) - 1}{\mbox{stride}[0]} + 1\right\rfloor +$$ +$$ + W_{out} = \left\lfloor\frac{W_{in} + 2 \times \mbox{padding}[1] - \mbox{dilation}[1] + \times (\mbox{kernel\_size}[1] - 1) - 1}{\mbox{stride}[1]} + 1\right\rfloor +$$
weight (Tensor): the learnable weights of the module of shape +\((\mbox{out\_channels}, \frac{\mbox{in\_channels}}{\mbox{groups}}\), +\(\mbox{kernel\_size[0]}, \mbox{kernel\_size[1]})\). +The values of these weights are sampled from +\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where +\(k = \frac{groups}{C_{\mbox{in}} * \prod_{i=0}^{1}\mbox{kernel\_size}[i]}\)
bias (Tensor): the learnable bias of the module of shape
+(out_channels). If bias is TRUE,
+then the values of these weights are
+sampled from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where
+\(k = \frac{groups}{C_{\mbox{in}} * \prod_{i=0}^{1}\mbox{kernel\_size}[i]}\)
+if (torch_is_installed()) { + +# With square kernels and equal stride +m <- nn_conv2d(16, 33, 3, stride = 2) +# non-square kernels and unequal stride and with padding +m <- nn_conv2d(16, 33, c(3, 5), stride=c(2, 1), padding=c(4, 2)) +# non-square kernels and unequal stride and with padding and dilation +m <- nn_conv2d(16, 33, c(3, 5), stride=c(2, 1), padding=c(4, 2), dilation=c(3, 1)) +input <- torch_randn(20, 16, 50, 100) +output <- m(input) + +}
Applies a 3D convolution over an input signal composed of several input +planes. +In the simplest case, the output value of the layer with input size \((N, C_{in}, D, H, W)\) +and output \((N, C_{out}, D_{out}, H_{out}, W_{out})\) can be precisely described as:
+nn_conv3d( + in_channels, + out_channels, + kernel_size, + stride = 1, + padding = 0, + dilation = 1, + groups = 1, + bias = TRUE, + padding_mode = "zeros" +)+ +
| in_channels | +(int): Number of channels in the input image |
+
|---|---|
| out_channels | +(int): Number of channels produced by the convolution |
+
| kernel_size | +(int or tuple): Size of the convolving kernel |
+
| stride | +(int or tuple, optional): Stride of the convolution. Default: 1 |
+
| padding | +(int or tuple, optional): Zero-padding added to all three sides of the input. Default: 0 |
+
| dilation | +(int or tuple, optional): Spacing between kernel elements. Default: 1 |
+
| groups | +(int, optional): Number of blocked connections from input channels to output channels. Default: 1 |
+
| bias | +(bool, optional): If |
+
| padding_mode | +(string, optional): |
+
$$ + out(N_i, C_{out_j}) = bias(C_{out_j}) + + \sum_{k = 0}^{C_{in} - 1} weight(C_{out_j}, k) \star input(N_i, k) +$$
+where \(\star\) is the valid 3D cross-correlation operator
stride controls the stride for the cross-correlation.
padding controls the amount of implicit zero-paddings on both
+sides for padding number of points for each dimension.
dilation controls the spacing between the kernel points; also known as the à trous algorithm.
+It is harder to describe, but this link_ has a nice visualization of what dilation does.
groups controls the connections between inputs and outputs.
+in_channels and out_channels must both be divisible by
+groups. For example,
At groups=1, all inputs are convolved to all outputs.
At groups=2, the operation becomes equivalent to having two conv +layers side by side, each seeing half the input channels, +and producing half the output channels, and both subsequently +concatenated.
At groups= in_channels, each input channel is convolved with
+its own set of filters, of size
+\(\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor\).
The parameters kernel_size, stride, padding, dilation can either be:
a single int -- in which case the same value is used for the depth, height and width dimension
a tuple of three ints -- in which case, the first int is used for the depth dimension,
+the second int for the height dimension and the third int for the width dimension
Depending of the size of your kernel, several (of the last)
+columns of the input might be lost, because it is a valid cross-correlation,
+and not a full cross-correlation.
+It is up to the user to add proper padding.
When groups == in_channels and out_channels == K * in_channels,
+where K is a positive integer, this operation is also termed in
+literature as depthwise convolution.
+In other words, for an input of size \((N, C_{in}, D_{in}, H_{in}, W_{in})\),
+a depthwise convolution with a depthwise multiplier K, can be constructed by arguments
+\((in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})\).
In some circumstances when using the CUDA backend with CuDNN, this operator
+may select a nondeterministic algorithm to increase performance. If this is
+undesirable, you can try to make the operation deterministic (potentially at
+a performance cost) by setting torch.backends.cudnn.deterministic = TRUE.
+Please see the notes on :doc:/notes/randomness for background.
Input: \((N, C_{in}, D_{in}, H_{in}, W_{in})\)
Output: \((N, C_{out}, D_{out}, H_{out}, W_{out})\) where +$$ + D_{out} = \left\lfloor\frac{D_{in} + 2 \times \mbox{padding}[0] - \mbox{dilation}[0] + \times (\mbox{kernel\_size}[0] - 1) - 1}{\mbox{stride}[0]} + 1\right\rfloor + $$ +$$ + H_{out} = \left\lfloor\frac{H_{in} + 2 \times \mbox{padding}[1] - \mbox{dilation}[1] + \times (\mbox{kernel\_size}[1] - 1) - 1}{\mbox{stride}[1]} + 1\right\rfloor + $$ +$$ + W_{out} = \left\lfloor\frac{W_{in} + 2 \times \mbox{padding}[2] - \mbox{dilation}[2] + \times (\mbox{kernel\_size}[2] - 1) - 1}{\mbox{stride}[2]} + 1\right\rfloor + $$
weight (Tensor): the learnable weights of the module of shape +\((\mbox{out\_channels}, \frac{\mbox{in\_channels}}{\mbox{groups}},\) +\(\mbox{kernel\_size[0]}, \mbox{kernel\_size[1]}, \mbox{kernel\_size[2]})\). +The values of these weights are sampled from +\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where +\(k = \frac{groups}{C_{\mbox{in}} * \prod_{i=0}^{2}\mbox{kernel\_size}[i]}\)
bias (Tensor): the learnable bias of the module of shape (out_channels). If bias is True,
+then the values of these weights are
+sampled from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where
+\(k = \frac{groups}{C_{\mbox{in}} * \prod_{i=0}^{2}\mbox{kernel\_size}[i]}\)
+if (torch_is_installed()) { +# With square kernels and equal stride +m <- nn_conv3d(16, 33, 3, stride=2) +# non-square kernels and unequal stride and with padding +m <- nn_conv3d(16, 33, c(3, 5, 2), stride=c(2, 1, 1), padding=c(4, 2, 0)) +input <- torch_randn(20, 16, 10, 50, 100) +output <- m(input) + +}
Applies a 1D transposed convolution operator over an input image +composed of several input planes.
+nn_conv_transpose1d( + in_channels, + out_channels, + kernel_size, + stride = 1, + padding = 0, + output_padding = 0, + groups = 1, + bias = TRUE, + dilation = 1, + padding_mode = "zeros" +)+ +
| in_channels | +(int): Number of channels in the input image |
+
|---|---|
| out_channels | +(int): Number of channels produced by the convolution |
+
| kernel_size | +(int or tuple): Size of the convolving kernel |
+
| stride | +(int or tuple, optional): Stride of the convolution. Default: 1 |
+
| padding | +(int or tuple, optional): |
+
| output_padding | +(int or tuple, optional): Additional size added to one side +of the output shape. Default: 0 |
+
| groups | +(int, optional): Number of blocked connections from input channels to output channels. Default: 1 |
+
| bias | +(bool, optional): If |
+
| dilation | +(int or tuple, optional): Spacing between kernel elements. Default: 1 |
+
| padding_mode | +(string, optional): |
+
This module can be seen as the gradient of Conv1d with respect to its input. +It is also known as a fractionally-strided convolution or +a deconvolution (although it is not an actual deconvolution operation).
stride controls the stride for the cross-correlation.
padding controls the amount of implicit zero-paddings on both
+sides for dilation * (kernel_size - 1) - padding number of points. See note
+below for details.
output_padding controls the additional size added to one side
+of the output shape. See note below for details.
dilation controls the spacing between the kernel points; also known as the
+à trous algorithm. It is harder to describe, but this link
+has a nice visualization of what dilation does.
groups controls the connections between inputs and outputs.
+in_channels and out_channels must both be divisible by
+groups. For example,
At groups=1, all inputs are convolved to all outputs.
At groups=2, the operation becomes equivalent to having two conv +layers side by side, each seeing half the input channels, +and producing half the output channels, and both subsequently +concatenated.
At groups= in_channels, each input channel is convolved with
+its own set of filters (of size
+\(\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor\)).
Depending of the size of your kernel, several (of the last)
+columns of the input might be lost, because it is a valid cross-correlation,
+and not a full cross-correlation.
+It is up to the user to add proper padding.
The padding argument effectively adds dilation * (kernel_size - 1) - padding
+amount of zero padding to both sizes of the input. This is set so that
+when a ~torch.nn.Conv1d and a ~torch.nn.ConvTranspose1d
+are initialized with same parameters, they are inverses of each other in
+regard to the input and output shapes. However, when stride > 1,
+~torch.nn.Conv1d maps multiple input shapes to the same output
+shape. output_padding is provided to resolve this ambiguity by
+effectively increasing the calculated output shape on one side. Note
+that output_padding is only used to find output shape, but does
+not actually add zero-padding to output.
In some circumstances when using the CUDA backend with CuDNN, this operator
+may select a nondeterministic algorithm to increase performance. If this is
+undesirable, you can try to make the operation deterministic (potentially at
+a performance cost) by setting torch.backends.cudnn.deterministic = TRUE.
Input: \((N, C_{in}, L_{in})\)
Output: \((N, C_{out}, L_{out})\) where +$$ + L_{out} = (L_{in} - 1) \times \mbox{stride} - 2 \times \mbox{padding} + \mbox{dilation} +\times (\mbox{kernel\_size} - 1) + \mbox{output\_padding} + 1 +$$
weight (Tensor): the learnable weights of the module of shape +\((\mbox{in\_channels}, \frac{\mbox{out\_channels}}{\mbox{groups}},\) +\(\mbox{kernel\_size})\). +The values of these weights are sampled from +\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where +\(k = \frac{groups}{C_{\mbox{out}} * \mbox{kernel\_size}}\)
bias (Tensor): the learnable bias of the module of shape (out_channels).
+If bias is TRUE, then the values of these weights are
+sampled from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where
+\(k = \frac{groups}{C_{\mbox{out}} * \mbox{kernel\_size}}\)
+if (torch_is_installed()) { +m <- nn_conv_transpose1d(32, 16, 2) +input <- torch_randn(10, 32, 2) +output <- m(input) + +}
Applies a 2D transposed convolution operator over an input image +composed of several input planes.
+nn_conv_transpose2d( + in_channels, + out_channels, + kernel_size, + stride = 1, + padding = 0, + output_padding = 0, + groups = 1, + bias = TRUE, + dilation = 1, + padding_mode = "zeros" +)+ +
| in_channels | +(int): Number of channels in the input image |
+
|---|---|
| out_channels | +(int): Number of channels produced by the convolution |
+
| kernel_size | +(int or tuple): Size of the convolving kernel |
+
| stride | +(int or tuple, optional): Stride of the convolution. Default: 1 |
+
| padding | +(int or tuple, optional): |
+
| output_padding | +(int or tuple, optional): Additional size added to one side +of each dimension in the output shape. Default: 0 |
+
| groups | +(int, optional): Number of blocked connections from input channels to output channels. Default: 1 |
+
| bias | +(bool, optional): If |
+
| dilation | +(int or tuple, optional): Spacing between kernel elements. Default: 1 |
+
| padding_mode | +(string, optional): |
+
This module can be seen as the gradient of Conv2d with respect to its input. +It is also known as a fractionally-strided convolution or +a deconvolution (although it is not an actual deconvolution operation).
stride controls the stride for the cross-correlation.
padding controls the amount of implicit zero-paddings on both
+sides for dilation * (kernel_size - 1) - padding number of points. See note
+below for details.
output_padding controls the additional size added to one side
+of the output shape. See note below for details.
dilation controls the spacing between the kernel points; also known as the à trous algorithm.
+It is harder to describe, but this link_ has a nice visualization of what dilation does.
groups controls the connections between inputs and outputs.
+in_channels and out_channels must both be divisible by
+groups. For example,
At groups=1, all inputs are convolved to all outputs.
At groups=2, the operation becomes equivalent to having two conv +layers side by side, each seeing half the input channels, +and producing half the output channels, and both subsequently +concatenated.
At groups= in_channels, each input channel is convolved with
+its own set of filters (of size
+\(\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor\)).
The parameters kernel_size, stride, padding, output_padding
+can either be:
a single int -- in which case the same value is used for the height and width dimensions
a tuple of two ints -- in which case, the first int is used for the height dimension,
+and the second int for the width dimension
Depending of the size of your kernel, several (of the last)
+columns of the input might be lost, because it is a valid cross-correlation_,
+and not a full cross-correlation. It is up to the user to add proper padding.
The padding argument effectively adds dilation * (kernel_size - 1) - padding
+amount of zero padding to both sizes of the input. This is set so that
+when a nn_conv2d and a nn_conv_transpose2d are initialized with same
+parameters, they are inverses of each other in
+regard to the input and output shapes. However, when stride > 1,
+nn_conv2d maps multiple input shapes to the same output
+shape. output_padding is provided to resolve this ambiguity by
+effectively increasing the calculated output shape on one side. Note
+that output_padding is only used to find output shape, but does
+not actually add zero-padding to output.
In some circumstances when using the CUDA backend with CuDNN, this operator
+may select a nondeterministic algorithm to increase performance. If this is
+undesirable, you can try to make the operation deterministic (potentially at
+a performance cost) by setting torch.backends.cudnn.deterministic = TRUE.
Input: \((N, C_{in}, H_{in}, W_{in})\)
Output: \((N, C_{out}, H_{out}, W_{out})\) where +$$ + H_{out} = (H_{in} - 1) \times \mbox{stride}[0] - 2 \times \mbox{padding}[0] + \mbox{dilation}[0] +\times (\mbox{kernel\_size}[0] - 1) + \mbox{output\_padding}[0] + 1 +$$ +$$ + W_{out} = (W_{in} - 1) \times \mbox{stride}[1] - 2 \times \mbox{padding}[1] + \mbox{dilation}[1] +\times (\mbox{kernel\_size}[1] - 1) + \mbox{output\_padding}[1] + 1 +$$
weight (Tensor): the learnable weights of the module of shape +\((\mbox{in\_channels}, \frac{\mbox{out\_channels}}{\mbox{groups}},\) +\(\mbox{kernel\_size[0]}, \mbox{kernel\_size[1]})\). +The values of these weights are sampled from +\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where +\(k = \frac{groups}{C_{\mbox{out}} * \prod_{i=0}^{1}\mbox{kernel\_size}[i]}\)
bias (Tensor): the learnable bias of the module of shape (out_channels)
+If bias is True, then the values of these weights are
+sampled from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where
+\(k = \frac{groups}{C_{\mbox{out}} * \prod_{i=0}^{1}\mbox{kernel\_size}[i]}\)
+if (torch_is_installed()) { +# With square kernels and equal stride +m <- nn_conv_transpose2d(16, 33, 3, stride=2) +# non-square kernels and unequal stride and with padding +m <- nn_conv_transpose2d(16, 33, c(3, 5), stride=c(2, 1), padding=c(4, 2)) +input <- torch_randn(20, 16, 50, 100) +output <- m(input) +# exact output size can be also specified as an argument +input <- torch_randn(1, 16, 12, 12) +downsample <- nn_conv2d(16, 16, 3, stride=2, padding=1) +upsample <- nn_conv_transpose2d(16, 16, 3, stride=2, padding=1) +h <- downsample(input) +h$size() +output <- upsample(h, output_size=input$size()) +output$size() + +}
Applies a 3D transposed convolution operator over an input image composed of several input +planes.
+nn_conv_transpose3d( + in_channels, + out_channels, + kernel_size, + stride = 1, + padding = 0, + output_padding = 0, + groups = 1, + bias = TRUE, + dilation = 1, + padding_mode = "zeros" +)+ +
| in_channels | +(int): Number of channels in the input image |
+
|---|---|
| out_channels | +(int): Number of channels produced by the convolution |
+
| kernel_size | +(int or tuple): Size of the convolving kernel |
+
| stride | +(int or tuple, optional): Stride of the convolution. Default: 1 |
+
| padding | +(int or tuple, optional): |
+
| output_padding | +(int or tuple, optional): Additional size added to one side +of each dimension in the output shape. Default: 0 |
+
| groups | +(int, optional): Number of blocked connections from input channels to output channels. Default: 1 |
+
| bias | +(bool, optional): If |
+
| dilation | +(int or tuple, optional): Spacing between kernel elements. Default: 1 |
+
| padding_mode | +(string, optional): |
+
The transposed convolution operator multiplies each input value element-wise by a learnable kernel, +and sums over the outputs from all input feature planes.
+This module can be seen as the gradient of Conv3d with respect to its input. +It is also known as a fractionally-strided convolution or +a deconvolution (although it is not an actual deconvolution operation).
stride controls the stride for the cross-correlation.
padding controls the amount of implicit zero-paddings on both
+sides for dilation * (kernel_size - 1) - padding number of points. See note
+below for details.
output_padding controls the additional size added to one side
+of the output shape. See note below for details.
dilation controls the spacing between the kernel points; also known as the à trous algorithm.
+It is harder to describe, but this link_ has a nice visualization of what dilation does.
groups controls the connections between inputs and outputs.
+in_channels and out_channels must both be divisible by
+groups. For example,
At groups=1, all inputs are convolved to all outputs.
At groups=2, the operation becomes equivalent to having two conv +layers side by side, each seeing half the input channels, +and producing half the output channels, and both subsequently +concatenated.
At groups= in_channels, each input channel is convolved with
+its own set of filters (of size
+\(\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor\)).
The parameters kernel_size, stride, padding, output_padding
+can either be:
a single int -- in which case the same value is used for the depth, height and width dimensions
a tuple of three ints -- in which case, the first int is used for the depth dimension,
+the second int for the height dimension and the third int for the width dimension
Depending of the size of your kernel, several (of the last)
+columns of the input might be lost, because it is a valid cross-correlation,
+and not a full cross-correlation.
+It is up to the user to add proper padding.
The padding argument effectively adds dilation * (kernel_size - 1) - padding
+amount of zero padding to both sizes of the input. This is set so that
+when a ~torch.nn.Conv3d and a ~torch.nn.ConvTranspose3d
+are initialized with same parameters, they are inverses of each other in
+regard to the input and output shapes. However, when stride > 1,
+~torch.nn.Conv3d maps multiple input shapes to the same output
+shape. output_padding is provided to resolve this ambiguity by
+effectively increasing the calculated output shape on one side. Note
+that output_padding is only used to find output shape, but does
+not actually add zero-padding to output.
In some circumstances when using the CUDA backend with CuDNN, this operator
+may select a nondeterministic algorithm to increase performance. If this is
+undesirable, you can try to make the operation deterministic (potentially at
+a performance cost) by setting torch.backends.cudnn.deterministic = TRUE.
Input: \((N, C_{in}, D_{in}, H_{in}, W_{in})\)
Output: \((N, C_{out}, D_{out}, H_{out}, W_{out})\) where +$$ + D_{out} = (D_{in} - 1) \times \mbox{stride}[0] - 2 \times \mbox{padding}[0] + \mbox{dilation}[0] +\times (\mbox{kernel\_size}[0] - 1) + \mbox{output\_padding}[0] + 1 +$$ +$$ + H_{out} = (H_{in} - 1) \times \mbox{stride}[1] - 2 \times \mbox{padding}[1] + \mbox{dilation}[1] +\times (\mbox{kernel\_size}[1] - 1) + \mbox{output\_padding}[1] + 1 +$$ +$$ + W_{out} = (W_{in} - 1) \times \mbox{stride}[2] - 2 \times \mbox{padding}[2] + \mbox{dilation}[2] +\times (\mbox{kernel\_size}[2] - 1) + \mbox{output\_padding}[2] + 1 +$$
weight (Tensor): the learnable weights of the module of shape +\((\mbox{in\_channels}, \frac{\mbox{out\_channels}}{\mbox{groups}},\) +\(\mbox{kernel\_size[0]}, \mbox{kernel\_size[1]}, \mbox{kernel\_size[2]})\). +The values of these weights are sampled from +\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where +\(k = \frac{groups}{C_{\mbox{out}} * \prod_{i=0}^{2}\mbox{kernel\_size}[i]}\)
bias (Tensor): the learnable bias of the module of shape (out_channels)
+If bias is True, then the values of these weights are
+sampled from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where
+\(k = \frac{groups}{C_{\mbox{out}} * \prod_{i=0}^{2}\mbox{kernel\_size}[i]}\)
+if (torch_is_installed()) { +if (FALSE) { +# With square kernels and equal stride +m <- nn_conv_transpose3d(16, 33, 3, stride=2) +# non-square kernels and unequal stride and with padding +m <- nn_conv_transpose3d(16, 33, c(3, 5, 2), stride=c(2, 1, 1), padding=c(0, 4, 2)) +input <- torch_randn(20, 16, 10, 50, 100) +output <- m(input) +} +}
This criterion combines nn_log_softmax() and nn_nll_loss() in one single class.
+It is useful when training a classification problem with C classes.
nn_cross_entropy_loss(weight = NULL, ignore_index = -100, reduction = "mean")+ +
| weight | +(Tensor, optional): a manual rescaling weight given to each class.
+If given, has to be a Tensor of size |
+
|---|---|
| ignore_index | +(int, optional): Specifies a target value that is ignored
+and does not contribute to the input gradient. When |
+
| reduction | +(string, optional): Specifies the reduction to apply to the output:
+ |
+
If provided, the optional argument weight should be a 1D Tensor
+assigning weight to each of the classes.
This is particularly useful when you have an unbalanced training set.
+The input is expected to contain raw, unnormalized scores for each class.
+input has to be a Tensor of size either \((minibatch, C)\) or
+\((minibatch, C, d_1, d_2, ..., d_K)\)
+with \(K \geq 1\) for the K-dimensional case (described later).
This criterion expects a class index in the range \([0, C-1]\) as the
+target for each value of a 1D tensor of size minibatch; if ignore_index
+is specified, this criterion also accepts this class index (this index may not
+necessarily be in the class range).
The loss can be described as:
+$$
+ \mbox{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right)
+= -x[class] + \log\left(\sum_j \exp(x[j])\right)
+$$
+or in the case of the weight argument being specified:
+$$
+ \mbox{loss}(x, class) = weight[class] \left(-x[class] + \log\left(\sum_j \exp(x[j])\right)\right)
+$$
The losses are averaged across observations for each minibatch. +Can also be used for higher dimension inputs, such as 2D images, by providing +an input of size \((minibatch, C, d_1, d_2, ..., d_K)\) with \(K \geq 1\), +where \(K\) is the number of dimensions, and a target of appropriate shape +(see below).
+Input: \((N, C)\) where C = number of classes, or
+\((N, C, d_1, d_2, ..., d_K)\) with \(K \geq 1\)
+in the case of K-dimensional loss.
Target: \((N)\) where each value is \(0 \leq \mbox{targets}[i] \leq C-1\), or +\((N, d_1, d_2, ..., d_K)\) with \(K \geq 1\) in the case of +K-dimensional loss.
Output: scalar.
+If reduction is 'none', then the same size as the target:
+\((N)\), or
+\((N, d_1, d_2, ..., d_K)\) with \(K \geq 1\) in the case
+of K-dimensional loss.
+if (torch_is_installed()) { +loss <- nn_cross_entropy_loss() +input <- torch_randn(3, 5, requires_grad=TRUE) +target <- torch_randint(low = 1, high = 5, size = 3, dtype = torch_long()) +output <- loss(input, target) +output$backward() + +}
During training, randomly zeroes some of the elements of the input
+tensor with probability p using samples from a Bernoulli
+distribution. Each channel will be zeroed out independently on every forward
+call.
nn_dropout(p = 0.5, inplace = FALSE)+ +
| p | +probability of an element to be zeroed. Default: 0.5 |
+
|---|---|
| inplace | +If set to |
+
This has proven to be an effective technique for regularization and +preventing the co-adaptation of neurons as described in the paper +Improving neural networks by preventing co-adaptation of feature detectors.
+Furthermore, the outputs are scaled by a factor of :math:\frac{1}{1-p} during
+training. This means that during evaluation the module simply computes an
+identity function.
Input: \((*)\). Input can be of any shape
Output: \((*)\). Output is of the same shape as input
+if (torch_is_installed()) { +m <- nn_dropout(p = 0.2) +input <- torch_randn(20, 16) +output <- m(input) + +}
Randomly zero out entire channels (a channel is a 2D feature map, +e.g., the \(j\)-th channel of the \(i\)-th sample in the +batched input is a 2D tensor \(\mbox{input}[i, j]\)).
+nn_dropout2d(p = 0.5, inplace = FALSE)+ +
| p | +(float, optional): probability of an element to be zero-ed. |
+
|---|---|
| inplace | +(bool, optional): If set to |
+
Each channel will be zeroed out independently on every forward call with
+probability p using samples from a Bernoulli distribution.
+Usually the input comes from nn_conv2d modules.
As described in the paper +Efficient Object Localization Using Convolutional Networks , +if adjacent pixels within feature maps are strongly correlated +(as is normally the case in early convolution layers) then i.i.d. dropout +will not regularize the activations and will otherwise just result +in an effective learning rate decrease. +In this case, nn_dropout2d will help promote independence between +feature maps and should be used instead.
+Input: \((N, C, H, W)\)
Output: \((N, C, H, W)\) (same shape as input)
+if (torch_is_installed()) { +m <- nn_dropout2d(p = 0.2) +input <- torch_randn(20, 16, 32, 32) +output <- m(input) + +}
Randomly zero out entire channels (a channel is a 3D feature map, +e.g., the \(j\)-th channel of the \(i\)-th sample in the +batched input is a 3D tensor \(\mbox{input}[i, j]\)).
+nn_dropout3d(p = 0.5, inplace = FALSE)+ +
| p | +(float, optional): probability of an element to be zeroed. |
+
|---|---|
| inplace | +(bool, optional): If set to |
+
Each channel will be zeroed out independently on every forward call with
+probability p using samples from a Bernoulli distribution.
+Usually the input comes from nn_conv2d modules.
As described in the paper +Efficient Object Localization Using Convolutional Networks , +if adjacent pixels within feature maps are strongly correlated +(as is normally the case in early convolution layers) then i.i.d. dropout +will not regularize the activations and will otherwise just result +in an effective learning rate decrease.
+In this case, nn_dropout3d will help promote independence between +feature maps and should be used instead.
+Input: \((N, C, D, H, W)\)
Output: \((N, C, D, H, W)\) (same shape as input)
+if (torch_is_installed()) { +m <- nn_dropout3d(p = 0.2) +input <- torch_randn(20, 16, 4, 32, 32) +output <- m(input) + +}
Applies the element-wise function:
+nn_elu(alpha = 1, inplace = FALSE)+ +
| alpha | +the \(\alpha\) value for the ELU formulation. Default: 1.0 |
+
|---|---|
| inplace | +can optionally do the operation in-place. Default: |
+
$$ + \mbox{ELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x) - 1)) +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
A simple lookup table that stores embeddings of a fixed dictionary and size. +This module is often used to store word embeddings and retrieve them using indices. +The input to the module is a list of indices, and the output is the corresponding +word embeddings.
+nn_embedding( + num_embeddings, + embedding_dim, + padding_idx = NULL, + max_norm = NULL, + norm_type = 2, + scale_grad_by_freq = FALSE, + sparse = FALSE, + .weight = NULL +)+ +
| num_embeddings | +(int): size of the dictionary of embeddings |
+
|---|---|
| embedding_dim | +(int): the size of each embedding vector |
+
| padding_idx | +(int, optional): If given, pads the output with the embedding vector at |
+
| max_norm | +(float, optional): If given, each embedding vector with norm larger than |
+
| norm_type | +(float, optional): The p of the p-norm to compute for the |
+
| scale_grad_by_freq | +(boolean, optional): If given, this will scale gradients by the inverse of frequency of
+the words in the mini-batch. Default |
+
| sparse | +(bool, optional): If |
+
| .weight | +(Tensor) embeddings weights (in case you want to set it manually) +See Notes for more details regarding sparse gradients. |
+
Keep in mind that only a limited number of optimizers support
+sparse gradients: currently it's optim.SGD (CUDA and CPU),
+optim.SparseAdam (CUDA and CPU) and optim.Adagrad (CPU)
With padding_idx set, the embedding vector at
+padding_idx is initialized to all zeros. However, note that this
+vector can be modified afterwards, e.g., using a customized
+initialization method, and thus changing the vector used to pad the
+output. The gradient for this vector from nn_embedding
+is always zero.
weight (Tensor): the learnable weights of the module of shape (num_embeddings, embedding_dim) +initialized from \(\mathcal{N}(0, 1)\)
Input: \((*)\), LongTensor of arbitrary shape containing the indices to extract
Output: \((*, H)\), where * is the input shape and \(H=\mbox{embedding\_dim}\)
+if (torch_is_installed()) { +# an Embedding module containing 10 tensors of size 3 +embedding <- nn_embedding(10, 3) +# a batch of 2 samples of 4 indices each +input <- torch_tensor(rbind(c(1,2,4,5),c(4,3,2,9)), dtype = torch_long()) +embedding(input) +# example with padding_idx +embedding <- nn_embedding(10, 3, padding_idx=1) +input <- torch_tensor(matrix(c(1,3,1,6), nrow = 1), dtype = torch_long()) +embedding(input) + +}
R/nn-pooling.R
+ nn_fractional_max_pool2d.RdFractional MaxPooling is described in detail in the paper +Fractional MaxPooling by Ben Graham
+nn_fractional_max_pool2d( + kernel_size, + output_size = NULL, + output_ratio = NULL, + return_indices = FALSE +)+ +
| kernel_size | +the size of the window to take a max over.
+Can be a single number k (for a square kernel of k x k) or a tuple |
+
|---|---|
| output_size | +the target output size of the image of the form |
+
| output_ratio | +If one wants to have an output size as a ratio of the input size, this option can be given. +This has to be a number or tuple in the range (0, 1) |
+
| return_indices | +if |
+
The max-pooling operation is applied in \(kH \times kW\) regions by a stochastic +step size determined by the target output size. +The number of output features is equal to the number of input planes.
+ ++if (torch_is_installed()) { +# pool of square window of size=3, and target output size 13x12 +m = nn_fractional_max_pool2d(3, output_size=c(13, 12)) +# pool of square window and target output size being half of input image size +m = nn_fractional_max_pool2d(3, output_ratio=c(0.5, 0.5)) +input = torch_randn(20, 16, 50, 32) +output = m(input) + +}
R/nn-pooling.R
+ nn_fractional_max_pool3d.RdFractional MaxPooling is described in detail in the paper +Fractional MaxPooling by Ben Graham
+nn_fractional_max_pool3d( + kernel_size, + output_size = NULL, + output_ratio = NULL, + return_indices = FALSE +)+ +
| kernel_size | +the size of the window to take a max over.
+Can be a single number k (for a square kernel of k x k x k) or a tuple |
+
|---|---|
| output_size | +the target output size of the image of the form |
+
| output_ratio | +If one wants to have an output size as a ratio of the input size, this option can be given. +This has to be a number or tuple in the range (0, 1) |
+
| return_indices | +if |
+
The max-pooling operation is applied in \(kTxkHxkW\) regions by a stochastic +step size determined by the target output size. +The number of output features is equal to the number of input planes.
+ ++if (torch_is_installed()) { +# pool of cubic window of size=3, and target output size 13x12x11 +m = nn_fractional_max_pool3d(3, output_size=c(13, 12, 11)) +# pool of cubic window and target output size being half of input size +m = nn_fractional_max_pool3d(3, output_ratio=c(0.5, 0.5, 0.5)) +input = torch_randn(20, 16, 50, 32, 16) +output = m(input) + +}
Applies the Gaussian Error Linear Units function: +$$\mbox{GELU}(x) = x * \Phi(x)$$
+nn_gelu()
+
+
+ where \(\Phi(x)\) is the Cumulative Distribution Function for Gaussian Distribution.
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applies the gated linear unit function +\({GLU}(a, b)= a \otimes \sigma(b)\) where \(a\) is the first half +of the input matrices and \(b\) is the second half.
+nn_glu(dim = -1)+ +
| dim | +(int): the dimension on which to split the input. Default: -1 |
+
|---|
Input: \((\ast_1, N, \ast_2)\) where * means, any number of additional
+dimensions
Output: \((\ast_1, M, \ast_2)\) where \(M=N/2\)
+
Applies the hard shrinkage function element-wise:
+nn_hardshrink(lambd = 0.5)+ +
| lambd | +the \(\lambda\) value for the Hardshrink formulation. Default: 0.5 |
+
|---|
$$ + \mbox{HardShrink}(x) = + \left\{ \begin{array}{ll} +x, & \mbox{ if } x > \lambda \\ +x, & \mbox{ if } x < -\lambda \\ +0, & \mbox{ otherwise } +\end{array} +\right. +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applies the element-wise function:
+nn_hardsigmoid()
+
+
+ $$ +\mbox{Hardsigmoid}(x) = \left\{ \begin{array}{ll} + 0 & \mbox{if~} x \le -3, \\ + 1 & \mbox{if~} x \ge +3, \\ + x / 6 + 1 / 2 & \mbox{otherwise} +\end{array} +\right. +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_hardsigmoid() +input <- torch_randn(2) +output <- m(input) + +}
Applies the hardswish function, element-wise, as described in the paper: +Searching for MobileNetV3
+nn_hardswish()
+
+
+ $$ \mbox{Hardswish}(x) = \left\{ + \begin{array}{ll} + 0 & \mbox{if } x \le -3, \\ + x & \mbox{if } x \ge +3, \\ + x \cdot (x + 3)/6 & \mbox{otherwise} + \end{array} + \right. $$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+if (torch_is_installed()) { +if (FALSE) { +m <- nn_hardswish() +input <- torch_randn(2) +output <- m(input) +} + +}
Applies the HardTanh function element-wise +HardTanh is defined as:
+nn_hardtanh(min_val = -1, max_val = 1, inplace = FALSE)+ +
| min_val | +minimum value of the linear region range. Default: -1 |
+
|---|---|
| max_val | +maximum value of the linear region range. Default: 1 |
+
| inplace | +can optionally do the operation in-place. Default: |
+
$$ +\mbox{HardTanh}(x) = \left\{ \begin{array}{ll} + 1 & \mbox{ if } x > 1 \\ + -1 & \mbox{ if } x < -1 \\ + x & \mbox{ otherwise } \\ +\end{array} +\right. +$$
+The range of the linear region :math:[-1, 1] can be adjusted using
+min_val and max_val.
Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_hardtanh(-2, 2) +input <- torch_randn(2) +output <- m(input) + +}
A placeholder identity operator that is argument-insensitive.
+nn_identity(...)+ +
| ... | +any arguments (unused) |
+
|---|
+if (torch_is_installed()) { +m <- nn_identity(54, unused_argument1 = 0.1, unused_argument2 = FALSE) +input <- torch_randn(128, 20) +output <- m(input) +print(output$size()) + +}
Fills the 3, 4, 5-dimensional input Tensor with the Dirac
+delta function. Preserves the identity of the inputs in Convolutional
+layers, where as many input channels are preserved as possible. In case
+of groups>1, each group of channels preserves identity.
nn_init_dirac_(tensor, groups = 1)+ +
| tensor | +a 3, 4, 5-dimensional |
+
|---|---|
| groups | +(optional) number of groups in the conv layer (default: 1) |
+
+
Fills the input Tensor with values according to the method
+described in Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015), using a
+normal distribution.
nn_init_kaiming_normal_( + tensor, + a = 0, + mode = "fan_in", + nonlinearity = "leaky_relu" +)+ +
| tensor | +an n-dimensional |
+
|---|---|
| a | +the negative slope of the rectifier used after this layer (only used
+with |
+
| mode | +either 'fan_in' (default) or 'fan_out'. Choosing 'fan_in' preserves +the magnitude of the variance of the weights in the forward pass. Choosing +'fan_out' preserves the magnitudes in the backwards pass. |
+
| nonlinearity | +the non-linear function. recommended to use only with 'relu' +or 'leaky_relu' (default). |
+
+if (torch_is_installed()) { +w <- torch_empty(3, 5) +nn_init_kaiming_normal_(w, mode = "fan_in", nonlinearity = "leaky_relu") + +}
Fills the input Tensor with values according to the method
+described in Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015), using a
+uniform distribution.
nn_init_kaiming_uniform_( + tensor, + a = 0, + mode = "fan_in", + nonlinearity = "leaky_relu" +)+ +
| tensor | +an n-dimensional |
+
|---|---|
| a | +the negative slope of the rectifier used after this layer (only used
+with |
+
| mode | +either 'fan_in' (default) or 'fan_out'. Choosing 'fan_in' preserves +the magnitude of the variance of the weights in the forward pass. Choosing +'fan_out' preserves the magnitudes in the backwards pass. |
+
| nonlinearity | +the non-linear function. recommended to use only with 'relu' +or 'leaky_relu' (default). |
+
+if (torch_is_installed()) { +w <- torch_empty(3, 5) +nn_init_kaiming_uniform_(w, mode = "fan_in", nonlinearity = "leaky_relu") + +}
Fills the input Tensor with values drawn from the normal distribution
+nn_init_normal_(tensor, mean = 0, std = 1)+ +
| tensor | +an n-dimensional Tensor |
+
|---|---|
| mean | +the mean of the normal distribution |
+
| std | +the standard deviation of the normal distribution |
+
+
Fills the input Tensor with a (semi) orthogonal matrix, as
+described in Exact solutions to the nonlinear dynamics of learning in deep linear neural networks - Saxe, A. et al. (2013). The input tensor must have
+at least 2 dimensions, and for tensors with more than 2 dimensions the
+trailing dimensions are flattened.
nn_init_orthogonal_(tensor, gain = 1)+ +
| tensor | +an n-dimensional |
+
|---|---|
| gain | +optional scaling factor |
+
+
Fills the 2D input Tensor as a sparse matrix, where the
+non-zero elements will be drawn from the normal distribution
+as described in Deep learning via Hessian-free optimization - Martens, J. (2010).
nn_init_sparse_(tensor, sparsity, std = 0.01)+ +
| tensor | +an n-dimensional |
+
|---|---|
| sparsity | +The fraction of elements in each column to be set to zero |
+
| std | +the standard deviation of the normal distribution used to generate +the non-zero values |
+
+if (torch_is_installed()) { +if (FALSE) { +w <- torch_empty(3, 5) +nn_init_sparse_(w, sparsity = 0.1) +} +}
Fills the input Tensor with values drawn from a truncated +normal distribution.
+nn_init_trunc_normal_(tensor, mean = 0, std = 1, a = -2, b = -2)+ +
| tensor | +an n-dimensional Tensor |
+
|---|---|
| mean | +the mean of the normal distribution |
+
| std | +the standard deviation of the normal distribution |
+
| a | +the minimum cutoff value |
+
| b | +the maximum cutoff value |
+
+
Fills the input Tensor with values drawn from the uniform distribution
+nn_init_uniform_(tensor, a = 0, b = 1)+ +
| tensor | +an n-dimensional Tensor |
+
|---|---|
| a | +the lower bound of the uniform distribution |
+
| b | +the upper bound of the uniform distribution |
+
+
Fills the input Tensor with values according to the method
+described in Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010), using a normal
+distribution.
nn_init_xavier_normal_(tensor, gain = 1)+ +
| tensor | +an n-dimensional |
+
|---|---|
| gain | +an optional scaling factor |
+
+
Fills the input Tensor with values according to the method
+described in Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010), using a uniform
+distribution.
nn_init_xavier_uniform_(tensor, gain = 1)+ +
| tensor | +an n-dimensional |
+
|---|---|
| gain | +an optional scaling factor |
+
+
Applies the element-wise function:
+nn_leaky_relu(negative_slope = 0.01, inplace = FALSE)+ +
| negative_slope | +Controls the angle of the negative slope. Default: 1e-2 |
+
|---|---|
| inplace | +can optionally do the operation in-place. Default: |
+
$$ + \mbox{LeakyReLU}(x) = \max(0, x) + \mbox{negative\_slope} * \min(0, x) +$$ +or
+$$ + \mbox{LeakyRELU}(x) = + \left\{ \begin{array}{ll} +x, & \mbox{ if } x \geq 0 \\ +\mbox{negative\_slope} \times x, & \mbox{ otherwise } +\end{array} +\right. +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_leaky_relu(0.1) +input <- torch_randn(2) +output <- m(input) + +}
Applies a linear transformation to the incoming data: y = xA^T + b
nn_linear(in_features, out_features, bias = TRUE)+ +
| in_features | +size of each input sample |
+
|---|---|
| out_features | +size of each output sample |
+
| bias | +If set to |
+
Input: (N, *, H_in) where * means any number of
+additional dimensions and H_in = in_features.
Output: (N, *, H_out) where all but the last dimension
+are the same shape as the input and :math:H_out = out_features.
weight: the learnable weights of the module of shape
+(out_features, in_features). The values are
+initialized from \(U(-\sqrt{k}, \sqrt{k})\)s, where
+\(k = \frac{1}{\mbox{in\_features}}\)
bias: the learnable bias of the module of shape \((\mbox{out\_features})\).
+If bias is TRUE, the values are initialized from
+\(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) where
+\(k = \frac{1}{\mbox{in\_features}}\)
+if (torch_is_installed()) { +m <- nn_linear(20, 30) +input <- torch_randn(128, 20) +output <- m(input) +print(output$size()) + +}
Applies the element-wise function: +$$ + \mbox{LogSigmoid}(x) = \log\left(\frac{ 1 }{ 1 + \exp(-x)}\right) + $$
+nn_log_sigmoid()
+
+
+ Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_log_sigmoid() +input <- torch_randn(2) +output <- m(input) + +}
Applies the \(\log(\mbox{Softmax}(x))\) function to an n-dimensional +input Tensor. The LogSoftmax formulation can be simplified as:
+nn_log_softmax(dim)+ +
| dim | +(int): A dimension along which LogSoftmax will be computed. |
+
|---|
a Tensor of the same dimension and shape as the input with +values in the range [-inf, 0)
+$$ + \mbox{LogSoftmax}(x_{i}) = \log\left(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} \right) +$$
+Input: \((*)\) where * means, any number of additional
+dimensions
Output: \((*)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_log_softmax(1) +input <- torch_randn(2, 3) +output <- m(input) + +}
R/nn-pooling.R
+ nn_lp_pool1d.RdOn each window, the function computed is:
+$$ + f(X) = \sqrt[p]{\sum_{x \in X} x^{p}} +$$
+nn_lp_pool1d(norm_type, kernel_size, stride = NULL, ceil_mode = FALSE)+ +
| norm_type | +if inf than one gets max pooling if 0 you get sum pooling ( +proportional to the avg pooling) |
+
|---|---|
| kernel_size | +a single int, the size of the window |
+
| stride | +a single int, the stride of the window. Default value is |
+
| ceil_mode | +when TRUE, will use |
+
At p = \(\infty\), one gets Max Pooling
At p = 1, one gets Sum Pooling (which is proportional to Average Pooling)
If the sum to the power of p is zero, the gradient of this function is
+not defined. This implementation will set the gradient to zero in this case.
Input: \((N, C, L_{in})\)
Output: \((N, C, L_{out})\), where
$$ + L_{out} = \left\lfloor\frac{L_{in} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { +# power-2 pool of window of length 3, with stride 2. +m <- nn_lp_pool1d(2, 3, stride=2) +input <- torch_randn(20, 16, 50) +output <- m(input) + +}
R/nn-pooling.R
+ nn_lp_pool2d.RdOn each window, the function computed is:
+$$ + f(X) = \sqrt[p]{\sum_{x \in X} x^{p}} +$$
+nn_lp_pool2d(norm_type, kernel_size, stride = NULL, ceil_mode = FALSE)+ +
| norm_type | +if inf than one gets max pooling if 0 you get sum pooling ( +proportional to the avg pooling) |
+
|---|---|
| kernel_size | +the size of the window |
+
| stride | +the stride of the window. Default value is |
+
| ceil_mode | +when TRUE, will use |
+
At p = \(\infty\), one gets Max Pooling
At p = 1, one gets Sum Pooling (which is proportional to average pooling)
The parameters kernel_size, stride can either be:
a single int -- in which case the same value is used for the height and width dimension
a tuple of two ints -- in which case, the first int is used for the height dimension,
+and the second int for the width dimension
If the sum to the power of p is zero, the gradient of this function is
+not defined. This implementation will set the gradient to zero in this case.
Input: \((N, C, H_{in}, W_{in})\)
Output: \((N, C, H_{out}, W_{out})\), where
$$ + H_{out} = \left\lfloor\frac{H_{in} - \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor +$$ +$$ + W_{out} = \left\lfloor\frac{W_{in} - \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { + +# power-2 pool of square window of size=3, stride=2 +m <- nn_lp_pool2d(2, 3, stride=2) +# pool of non-square window of power 1.2 +m <- nn_lp_pool2d(1.2, c(3, 2), stride=c(2, 1)) +input <- torch_randn(20, 16, 50, 32) +output <- m(input) + +}
Applies a 1D max pooling over an input signal composed of several input +planes.
+nn_max_pool1d( + kernel_size, + stride = NULL, + padding = 0, + dilation = 1, + return_indices = FALSE, + ceil_mode = FALSE +)+ +
| kernel_size | +the size of the window to take a max over |
+
|---|---|
| stride | +the stride of the window. Default value is |
+
| padding | +implicit zero padding to be added on both sides |
+
| dilation | +a parameter that controls the stride of elements in the window |
+
| return_indices | +if |
+
| ceil_mode | +when |
+
In the simplest case, the output value of the layer with input size \((N, C, L)\) +and output \((N, C, L_{out})\) can be precisely described as:
+$$ + out(N_i, C_j, k) = \max_{m=0, \ldots, \mbox{kernel\_size} - 1} +input(N_i, C_j, stride \times k + m) +$$
+If padding is non-zero, then the input is implicitly zero-padded on both sides
+for padding number of points. dilation controls the spacing between the kernel points.
+It is harder to describe, but this link
+has a nice visualization of what dilation does.
Input: \((N, C, L_{in})\)
Output: \((N, C, L_{out})\), where
$$ + L_{out} = \left\lfloor \frac{L_{in} + 2 \times \mbox{padding} - \mbox{dilation} + \times (\mbox{kernel\_size} - 1) - 1}{\mbox{stride}} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { +# pool of size=3, stride=2 +m <- nn_max_pool1d(3, stride=2) +input <- torch_randn(20, 16, 50) +output <- m(input) + +}
Applies a 2D max pooling over an input signal composed of several input +planes.
+nn_max_pool2d( + kernel_size, + stride = NULL, + padding = 0, + dilation = 1, + return_indices = FALSE, + ceil_mode = FALSE +)+ +
| kernel_size | +the size of the window to take a max over |
+
|---|---|
| stride | +the stride of the window. Default value is |
+
| padding | +implicit zero padding to be added on both sides |
+
| dilation | +a parameter that controls the stride of elements in the window |
+
| return_indices | +if |
+
| ceil_mode | +when |
+
In the simplest case, the output value of the layer with input size \((N, C, H, W)\),
+output \((N, C, H_{out}, W_{out})\) and kernel_size \((kH, kW)\)
+can be precisely described as:
$$ + \begin{array}{ll} +out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\ +& \mbox{input}(N_i, C_j, \mbox{stride[0]} \times h + m, + \mbox{stride[1]} \times w + n) +\end{array} +$$
+If padding is non-zero, then the input is implicitly zero-padded on both sides
+for padding number of points. dilation controls the spacing between the kernel points.
+It is harder to describe, but this link has a nice visualization of what dilation does.
The parameters kernel_size, stride, padding, dilation can either be:
a single int -- in which case the same value is used for the height and width dimension
a tuple of two ints -- in which case, the first int is used for the height dimension,
+and the second int for the width dimension
Input: \((N, C, H_{in}, W_{in})\)
Output: \((N, C, H_{out}, W_{out})\), where
$$ + H_{out} = \left\lfloor\frac{H_{in} + 2 * \mbox{padding[0]} - \mbox{dilation[0]} + \times (\mbox{kernel\_size[0]} - 1) - 1}{\mbox{stride[0]}} + 1\right\rfloor +$$
+$$ + W_{out} = \left\lfloor\frac{W_{in} + 2 * \mbox{padding[1]} - \mbox{dilation[1]} + \times (\mbox{kernel\_size[1]} - 1) - 1}{\mbox{stride[1]}} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { +# pool of square window of size=3, stride=2 +m <- nn_max_pool2d(3, stride=2) +# pool of non-square window +m <- nn_max_pool2d(c(3, 2), stride=c(2, 1)) +input <- torch_randn(20, 16, 50, 32) +output <- m(input) + +}
R/nn-pooling.R
+ nn_max_pool3d.RdIn the simplest case, the output value of the layer with input size \((N, C, D, H, W)\),
+output \((N, C, D_{out}, H_{out}, W_{out})\) and kernel_size \((kD, kH, kW)\)
+can be precisely described as:
nn_max_pool3d( + kernel_size, + stride = NULL, + padding = 0, + dilation = 1, + return_indices = FALSE, + ceil_mode = FALSE +)+ +
| kernel_size | +the size of the window to take a max over |
+
|---|---|
| stride | +the stride of the window. Default value is |
+
| padding | +implicit zero padding to be added on all three sides |
+
| dilation | +a parameter that controls the stride of elements in the window |
+
| return_indices | +if |
+
| ceil_mode | +when TRUE, will use |
+
$$ + \begin{aligned} +\text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, kD-1} \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\ +& \text{input}(N_i, C_j, \text{stride[0]} \times d + k, + \text{stride[1]} \times h + m, \text{stride[2]} \times w + n) +\end{aligned} +$$
+If padding is non-zero, then the input is implicitly zero-padded on both sides
+for padding number of points. dilation controls the spacing between the kernel points.
+It is harder to describe, but this link_ has a nice visualization of what dilation does.
+The parameters kernel_size, stride, padding, dilation can either be:
a single int -- in which case the same value is used for the depth, height and width dimension
a tuple of three ints -- in which case, the first int is used for the depth dimension,
+the second int for the height dimension and the third int for the width dimension
Input: \((N, C, D_{in}, H_{in}, W_{in})\)
Output: \((N, C, D_{out}, H_{out}, W_{out})\), where +$$ + D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times + (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor +$$
$$ + H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times + (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor +$$
+$$ + W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times + (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor +$$
+ ++if (torch_is_installed()) { +# pool of square window of size=3, stride=2 +m <- nn_max_pool3d(3, stride=2) +# pool of non-square window +m <- nn_max_pool3d(c(3, 2, 2), stride=c(2, 1, 2)) +input <- torch_randn(20, 16, 50,44, 31) +output <- m(input) + +}
MaxPool1d. — nn_max_unpool1d • torchMaxPool1d is not fully invertible, since the non-maximal values are lost.
+MaxUnpool1d takes in as input the output of MaxPool1d
+including the indices of the maximal values and computes a partial inverse
+in which all non-maximal values are set to zero.
nn_max_unpool1d(kernel_size, stride = NULL, padding = 0)+ +
| kernel_size | +(int or tuple): Size of the max pooling window. |
+
|---|---|
| stride | +(int or tuple): Stride of the max pooling window.
+It is set to |
+
| padding | +(int or tuple): Padding that was added to the input |
+
MaxPool1d can map several input sizes to the same output
+sizes. Hence, the inversion process can get ambiguous.
+To accommodate this, you can provide the needed output size
+as an additional argument output_size in the forward call.
+See the Inputs and Example below.
input: the input Tensor to invert
indices: the indices given out by nn_max_pool1d()
output_size (optional): the targeted output size
Input: \((N, C, H_{in})\)
Output: \((N, C, H_{out})\), where
+$$
+ H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{kernel\_size}[0]
+$$
+or as given by output_size in the call operator
+if (torch_is_installed()) { +pool <- nn_max_pool1d(2, stride=2, return_indices=TRUE) +unpool <- nn_max_unpool1d(2, stride=2) + +input <- torch_tensor(array(1:8/1, dim = c(1,1,8))) +out <- pool(input) +unpool(out[[1]], out[[2]]) + +# Example showcasing the use of output_size +input <- torch_tensor(array(1:8/1, dim = c(1,1,8))) +out <- pool(input) +unpool(out[[1]], out[[2]], output_size=input$size()) +unpool(out[[1]], out[[2]]) + +}
MaxPool2d. — nn_max_unpool2d • torchMaxPool2d is not fully invertible, since the non-maximal values are lost.
+MaxUnpool2d takes in as input the output of MaxPool2d
+including the indices of the maximal values and computes a partial inverse
+in which all non-maximal values are set to zero.
nn_max_unpool2d(kernel_size, stride = NULL, padding = 0)+ +
| kernel_size | +(int or tuple): Size of the max pooling window. |
+
|---|---|
| stride | +(int or tuple): Stride of the max pooling window.
+It is set to |
+
| padding | +(int or tuple): Padding that was added to the input |
+
MaxPool2d can map several input sizes to the same output
+sizes. Hence, the inversion process can get ambiguous.
+To accommodate this, you can provide the needed output size
+as an additional argument output_size in the forward call.
+See the Inputs and Example below.
input: the input Tensor to invert
indices: the indices given out by nn_max_pool2d()
output_size (optional): the targeted output size
Input: \((N, C, H_{in}, W_{in})\)
Output: \((N, C, H_{out}, W_{out})\), where
+$$
+ H_{out} = (H_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
+$$
+$$
+ W_{out} = (W_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
+$$
+or as given by output_size in the call operator
+if (torch_is_installed()) { + +pool <- nn_max_pool2d(2, stride=2, return_indices=TRUE) +unpool <- nn_max_unpool2d(2, stride=2) +input <- torch_randn(1,1,4,4) +out <- pool(input) +unpool(out[[1]], out[[2]]) + +# specify a different output size than input size +unpool(out[[1]], out[[2]], output_size=c(1, 1, 5, 5)) + +}
MaxPool3d. — nn_max_unpool3d • torchMaxPool3d is not fully invertible, since the non-maximal values are lost.
+MaxUnpool3d takes in as input the output of MaxPool3d
+including the indices of the maximal values and computes a partial inverse
+in which all non-maximal values are set to zero.
nn_max_unpool3d(kernel_size, stride = NULL, padding = 0)+ +
| kernel_size | +(int or tuple): Size of the max pooling window. |
+
|---|---|
| stride | +(int or tuple): Stride of the max pooling window.
+It is set to |
+
| padding | +(int or tuple): Padding that was added to the input |
+
MaxPool3d can map several input sizes to the same output
+sizes. Hence, the inversion process can get ambiguous.
+To accommodate this, you can provide the needed output size
+as an additional argument output_size in the forward call.
+See the Inputs section below.
input: the input Tensor to invert
indices: the indices given out by nn_max_pool3d()
output_size (optional): the targeted output size
Input: \((N, C, D_{in}, H_{in}, W_{in})\)
Output: \((N, C, D_{out}, H_{out}, W_{out})\), where
$$ + D_{out} = (D_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]} +$$ +$$ + H_{out} = (H_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]} +$$ +$$ + W_{out} = (W_{in} - 1) \times \text{stride[2]} - 2 \times \text{padding[2]} + \text{kernel\_size[2]} +$$
+or as given by output_size in the call operator
+if (torch_is_installed()) { + +# pool of square window of size=3, stride=2 +pool <- nn_max_pool3d(3, stride=2, return_indices=TRUE) +unpool <- nn_max_unpool3d(3, stride=2) +out <- pool(torch_randn(20, 16, 51, 33, 15)) +unpooled_output <- unpool(out[[1]], out[[2]]) +unpooled_output$size() + +}
Your models should also subclass this class.
+nn_module(classname = NULL, inherit = nn_Module, ...)+ +
| classname | +an optional name for the module |
+
|---|---|
| inherit | +an optional module to inherit from |
+
| ... | +methods implementation |
+
Modules can also contain other Modules, allowing to nest them in a tree +structure. You can assign the submodules as regular attributes.
+ ++if (torch_is_installed()) { +model <- nn_module( + initialize = function() { + self$conv1 <- nn_conv2d(1, 20, 5) + self$conv2 <- nn_conv2d(20, 20, 5) + }, + forward = function(input) { + input <- self$conv1(input) + input <- nnf_relu(input) + input <- self$conv2(input) + input <- nnf_relu(input) + input + } +) + +}
nn_module_list can be indexed like a regular R list, but
+modules it contains are properly registered, and will be visible by all
+nn_module methods.
nn_module_list(modules = list())+ +
| modules | +a list of modules to add |
+
|---|
+if (torch_is_installed()) { + +my_module <- nn_module( + initialize = function() { + self$linears <- nn_module_list(lapply(1:10, function(x) nn_linear(10, 10))) + }, + forward = function(x) { + for (i in 1:length(self$linears)) + x <- self$linears[[i]](x) + x + } +) + +}
Allows the model to jointly attend to information +from different representation subspaces. +See reference: Attention Is All You Need
+nn_multihead_attention( + embed_dim, + num_heads, + dropout = 0, + bias = TRUE, + add_bias_kv = FALSE, + add_zero_attn = FALSE, + kdim = NULL, + vdim = NULL +)+ +
| embed_dim | +total dimension of the model. |
+
|---|---|
| num_heads | +parallel attention heads. |
+
| dropout | +a Dropout layer on attn_output_weights. Default: 0.0. |
+
| bias | +add bias as module parameter. Default: True. |
+
| add_bias_kv | +add bias to the key and value sequences at dim=0. |
+
| add_zero_attn | +add a new batch of zeros to the key and +value sequences at dim=1. |
+
| kdim | +total number of features in key. Default: |
+
| vdim | +total number of features in value. Default: |
+
$$ + \mbox{MultiHead}(Q, K, V) = \mbox{Concat}(head_1,\dots,head_h)W^O +\mbox{where} head_i = \mbox{Attention}(QW_i^Q, KW_i^K, VW_i^V) +$$
+Inputs:
query: \((L, N, E)\) where L is the target sequence length, N is the batch size, E is +the embedding dimension.
key: \((S, N, E)\), where S is the source sequence length, N is the batch size, E is +the embedding dimension.
value: \((S, N, E)\) where S is the source sequence length, N is the batch size, E is +the embedding dimension.
key_padding_mask: \((N, S)\) where N is the batch size, S is the source sequence length.
+If a ByteTensor is provided, the non-zero positions will be ignored while the position
+with the zero positions will be unchanged. If a BoolTensor is provided, the positions with the
+value of True will be ignored while the position with the value of False will be unchanged.
attn_mask: 2D mask \((L, S)\) where L is the target sequence length, S is the source sequence length.
+3D mask \((N*num_heads, L, S)\) where N is the batch size, L is the target sequence length,
+S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked
+positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+while the zero positions will be unchanged. If a BoolTensor is provided, positions with True
+is not allowed to attend while False values will be unchanged. If a FloatTensor
+is provided, it will be added to the attention weight.
Outputs:
attn_output: \((L, N, E)\) where L is the target sequence length, N is the batch size, +E is the embedding dimension.
attn_output_weights: \((N, L, S)\) where N is the batch size, +L is the target sequence length, S is the source sequence length.
+if (torch_is_installed()) { +if (FALSE) { +multihead_attn = nn_multihead_attention(embed_dim, num_heads) +out <- multihead_attn(query, key, value) +attn_output <- out[[1]] +attn_output_weights <- out[[2]] +} + +}
Applies the element-wise function: +$$ + \mbox{PReLU}(x) = \max(0,x) + a * \min(0,x) +$$ +or +$$ + \mbox{PReLU}(x) = + \left\{ \begin{array}{ll} +x, & \mbox{ if } x \geq 0 \\ +ax, & \mbox{ otherwise } +\end{array} +\right. +$$
+nn_prelu(num_parameters = 1, init = 0.25)+ +
| num_parameters | +(int): number of \(a\) to learn. +Although it takes an int as input, there is only two values are legitimate: +1, or the number of channels at input. Default: 1 |
+
|---|---|
| init | +(float): the initial value of \(a\). Default: 0.25 |
+
Here \(a\) is a learnable parameter. When called without arguments, nn.prelu() uses a single
+parameter \(a\) across all input channels. If called with nn_prelu(nChannels),
+a separate \(a\) is used for each input channel.
weight decay should not be used when learning \(a\) for good performance.
+Channel dim is the 2nd dim of input. When input has dims < 2, then there is +no channel dim and the number of channels = 1.
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
weight (Tensor): the learnable weights of shape (num_parameters).
+
Applies the rectified linear unit function element-wise +$$\mbox{ReLU}(x) = (x)^+ = \max(0, x)$$
+nn_relu(inplace = FALSE)+ +
| inplace | +can optionally do the operation in-place. Default: |
+
|---|
Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applies the element-wise function:
+nn_relu6(inplace = FALSE)+ +
| inplace | +can optionally do the operation in-place. Default: |
+
|---|
$$ + \mbox{ReLU6}(x) = \min(\max(0,x), 6) +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applies a multi-layer Elman RNN with \(\tanh\) or \(\mbox{ReLU}\) non-linearity +to an input sequence.
+nn_rnn( + input_size, + hidden_size, + num_layers = 1, + nonlinearity = NULL, + bias = TRUE, + batch_first = FALSE, + dropout = 0, + bidirectional = FALSE, + ... +)+ +
| input_size | +The number of expected features in the input |
+
|---|---|
| hidden_size | +The number of features in the hidden state |
+
| num_layers | +Number of recurrent layers. E.g., setting |
+
| nonlinearity | +The non-linearity to use. Can be either |
+
| bias | +If |
+
| batch_first | +If |
+
| dropout | +If non-zero, introduces a |
+
| bidirectional | +If |
+
| ... | +other arguments that can be passed to the super class. |
+
For each element in the input sequence, each layer computes the following +function:
+$$ +h_t = \tanh(W_{ih} x_t + b_{ih} + W_{hh} h_{(t-1)} + b_{hh}) +$$
+where \(h_t\) is the hidden state at time t, \(x_t\) is
+the input at time t, and \(h_{(t-1)}\) is the hidden state of the
+previous layer at time t-1 or the initial hidden state at time 0.
+If nonlinearity is 'relu', then \(\mbox{ReLU}\) is used instead of
+\(\tanh\).
input of shape (seq_len, batch, input_size): tensor containing the features
+of the input sequence. The input can also be a packed variable length
+sequence.
h_0 of shape (num_layers * num_directions, batch, hidden_size): tensor
+containing the initial hidden state for each element in the batch.
+Defaults to zero if not provided. If the RNN is bidirectional,
+num_directions should be 2, else it should be 1.
output of shape (seq_len, batch, num_directions * hidden_size): tensor
+containing the output features (h_t) from the last layer of the RNN,
+for each t. If a :class:nn_packed_sequence has
+been given as the input, the output will also be a packed sequence.
+For the unpacked case, the directions can be separated
+using output$view(seq_len, batch, num_directions, hidden_size),
+with forward and backward being direction 0 and 1 respectively.
+Similarly, the directions can be separated in the packed case.
h_n of shape (num_layers * num_directions, batch, hidden_size): tensor
+containing the hidden state for t = seq_len.
+Like output, the layers can be separated using
+h_n$view(num_layers, num_directions, batch, hidden_size).
Input1: \((L, N, H_{in})\) tensor containing input features where
+\(H_{in}=\mbox{input\_size}\) and L represents a sequence length.
Input2: \((S, N, H_{out})\) tensor +containing the initial hidden state for each element in the batch. +\(H_{out}=\mbox{hidden\_size}\) +Defaults to zero if not provided. where \(S=\mbox{num\_layers} * \mbox{num\_directions}\) +If the RNN is bidirectional, num_directions should be 2, else it should be 1.
Output1: \((L, N, H_{all})\) where \(H_{all}=\mbox{num\_directions} * \mbox{hidden\_size}\)
Output2: \((S, N, H_{out})\) tensor containing the next hidden state +for each element in the batch
weight_ih_l[k]: the learnable input-hidden weights of the k-th layer,
+of shape (hidden_size, input_size) for k = 0. Otherwise, the shape is
+(hidden_size, num_directions * hidden_size)
weight_hh_l[k]: the learnable hidden-hidden weights of the k-th layer,
+of shape (hidden_size, hidden_size)
bias_ih_l[k]: the learnable input-hidden bias of the k-th layer,
+of shape (hidden_size)
bias_hh_l[k]: the learnable hidden-hidden bias of the k-th layer,
+of shape (hidden_size)
All the weights and biases are initialized from \(\mathcal{U}(-\sqrt{k}, \sqrt{k})\) +where \(k = \frac{1}{\mbox{hidden\_size}}\)
+ ++if (torch_is_installed()) { +rnn <- nn_rnn(10, 20, 2) +input <- torch_randn(5, 3, 10) +h0 <- torch_randn(2, 3, 20) +rnn(input, h0) + +}
Applies the randomized leaky rectified liner unit function, element-wise, +as described in the paper:
+nn_rrelu(lower = 1/8, upper = 1/3, inplace = FALSE)+ +
| lower | +lower bound of the uniform distribution. Default: \(\frac{1}{8}\) |
+
|---|---|
| upper | +upper bound of the uniform distribution. Default: \(\frac{1}{3}\) |
+
| inplace | +can optionally do the operation in-place. Default: |
+
Empirical Evaluation of Rectified Activations in Convolutional Network.
The function is defined as:
+$$ +\mbox{RReLU}(x) = +\left\{ \begin{array}{ll} +x & \mbox{if } x \geq 0 \\ +ax & \mbox{ otherwise } +\end{array} +\right. +$$
+where \(a\) is randomly sampled from uniform distribution +\(\mathcal{U}(\mbox{lower}, \mbox{upper})\). +See: https://arxiv.org/pdf/1505.00853.pdf
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applied element-wise, as:
+nn_selu(inplace = FALSE)+ +
| inplace | +(bool, optional): can optionally do the operation in-place. Default: |
+
|---|
$$ + \mbox{SELU}(x) = \mbox{scale} * (\max(0,x) + \min(0, \alpha * (\exp(x) - 1))) +$$
+with \(\alpha = 1.6732632423543772848170429916717\) and +\(\mbox{scale} = 1.0507009873554804934193349852946\).
+More details can be found in the paper +Self-Normalizing Neural Networks.
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
A sequential container. +Modules will be added to it in the order they are passed in the constructor. +See examples.
+nn_sequential(..., name = NULL)+ +
| ... | +sequence of modules to be added |
+
|---|---|
| name | +optional name for the generated module. |
+
+if (torch_is_installed()) { + +model <- nn_sequential( + nn_conv2d(1, 20, 5), + nn_relu(), + nn_conv2d(20, 64, 5), + nn_relu() +) +input <- torch_randn(32, 1, 28, 28) +output <- model(input) + +}
Applies the Softmax function to an n-dimensional input Tensor
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range [0,1] and sum to 1.
+Softmax is defined as:
nn_softmax(dim)+ +
| dim | +(int): A dimension along which Softmax will be computed (so every slice +along dim will sum to 1). |
+
|---|
:
+a Tensor of the same dimension and shape as the input with
+values in the range [0, 1]
$$ + \mbox{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)} +$$
+When the input Tensor is a sparse tensor then the unspecifed
+values are treated as -Inf.
This module doesn't work directly with NLLLoss,
+which expects the Log to be computed between the Softmax and itself.
+Use LogSoftmax instead (it's faster and has better numerical properties).
Input: \((*)\) where * means, any number of additional
+dimensions
Output: \((*)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_softmax(1) +input <- torch_randn(2, 3) +output <- m(input) + +}
Applies SoftMax over features to each spatial location.
+When given an image of Channels x Height x Width, it will
+apply Softmax to each location \((Channels, h_i, w_j)\)
nn_softmax2d()
+
+
+ a Tensor of the same dimension and shape as the input with
+values in the range [0, 1]
Input: \((N, C, H, W)\)
Output: \((N, C, H, W)\) (same shape as input)
+if (torch_is_installed()) { +m <- nn_softmax2d() +input <- torch_randn(2, 3, 12, 13) +output <- m(input) + +}
Applies the Softmin function to an n-dimensional input Tensor
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range [0, 1] and sum to 1.
+Softmin is defined as:
nn_softmin(dim)+ +
| dim | +(int): A dimension along which Softmin will be computed (so every slice +along dim will sum to 1). |
+
|---|
a Tensor of the same dimension and shape as the input, with
+values in the range [0, 1].
$$ + \mbox{Softmin}(x_{i}) = \frac{\exp(-x_i)}{\sum_j \exp(-x_j)} +$$
+Input: \((*)\) where * means, any number of additional
+dimensions
Output: \((*)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_softmin(dim = 1) +input <- torch_randn(2, 2) +output <- m(input) + +}
Applies the element-wise function: +$$ + \mbox{Softplus}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) +$$
+nn_softplus(beta = 1, threshold = 20)+ +
| beta | +the \(\beta\) value for the Softplus formulation. Default: 1 |
+
|---|---|
| threshold | +values above this revert to a linear function. Default: 20 |
+
SoftPlus is a smooth approximation to the ReLU function and can be used +to constrain the output of a machine to always be positive. +For numerical stability the implementation reverts to the linear function +when \(input \times \beta > threshold\).
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applies the soft shrinkage function elementwise:
+nn_softshrink(lambd = 0.5)+ +
| lambd | +the \(\lambda\) (must be no less than zero) value for the Softshrink formulation. Default: 0.5 |
+
|---|
$$ + \mbox{SoftShrinkage}(x) = + \left\{ \begin{array}{ll} +x - \lambda, & \mbox{ if } x > \lambda \\ +x + \lambda, & \mbox{ if } x < -\lambda \\ +0, & \mbox{ otherwise } +\end{array} +\right. +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Applies the element-wise function:
+nn_tanh()
+
+
+ $$ + \mbox{Tanh}(x) = \tanh(x) = \frac{\exp(x) - \exp(-x)} {\exp(x) + \exp(-x)} +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+
Thresholds each element of the input Tensor.
+nn_threshold(threshold, value, inplace = FALSE)+ +
| threshold | +The value to threshold at |
+
|---|---|
| value | +The value to replace with |
+
| inplace | +can optionally do the operation in-place. Default: |
+
Threshold is defined as: +$$ + y = + \left\{ \begin{array}{ll} + x, &\mbox{ if } x > \mbox{threshold} \\ + \mbox{value}, &\mbox{ otherwise } + \end{array} + \right. +$$
+Input: \((N, *)\) where * means, any number of additional
+dimensions
Output: \((N, *)\), same shape as the input
+if (torch_is_installed()) { +m <- nn_threshold(0.1, 20) +input <- torch_randn(2) +output <- m(input) + +}
R/nn-utils-rnn.R
+ nn_utils_rnn_pack_padded_sequence.Rdinput can be of size T x B x * where T is the length of the
+longest sequence (equal to lengths[1]), B is the batch size, and
+* is any number of dimensions (including 0). If batch_first is
+TRUE, B x T x * input is expected.
nn_utils_rnn_pack_padded_sequence( + input, + lengths, + batch_first = FALSE, + enforce_sorted = TRUE +)+ +
| input | +(Tensor): padded batch of variable length sequences. |
+
|---|---|
| lengths | +(Tensor): list of sequences lengths of each batch element. |
+
| batch_first | +(bool, optional): if |
+
| enforce_sorted | +(bool, optional): if |
+
a PackedSequence object
For unsorted sequences, use enforce_sorted = FALSE. If enforce_sorted is
+TRUE, the sequences should be sorted by length in a decreasing order, i.e.
+input[,1] should be the longest sequence, and input[,B] the shortest
+one. enforce_sorted = TRUE is only necessary for ONNX export.
This function accepts any input that has at least two dimensions. You
+can apply it to pack the labels, and use the output of the RNN with
+them to compute the loss directly. A Tensor can be retrieved from
+a PackedSequence object by accessing its .data attribute.
R/nn-utils-rnn.R
+ nn_utils_rnn_pack_sequence.Rdsequences should be a list of Tensors of size L x *, where L is
+the length of a sequence and * is any number of trailing dimensions,
+including zero.
nn_utils_rnn_pack_sequence(sequences, enforce_sorted = TRUE)+ +
| sequences | +
|
+
|---|---|
| enforce_sorted | +(bool, optional): if |
+
a PackedSequence object
For unsorted sequences, use enforce_sorted = FALSE. If enforce_sorted
+is TRUE, the sequences should be sorted in the order of decreasing length.
+enforce_sorted = TRUE is only necessary for ONNX export.
+if (torch_is_installed()) { +x <- torch_tensor(c(1,2,3), dtype = torch_long()) +y <- torch_tensor(c(4, 5), dtype = torch_long()) +z <- torch_tensor(c(6), dtype = torch_long()) + +p <- nn_utils_rnn_pack_sequence(list(x, y, z)) + +}
R/nn-utils-rnn.R
+ nn_utils_rnn_pad_packed_sequence.RdIt is an inverse operation to nn_utils_rnn_pack_padded_sequence().
nn_utils_rnn_pad_packed_sequence( + sequence, + batch_first = FALSE, + padding_value = 0, + total_length = NULL +)+ +
| sequence | +(PackedSequence): batch to pad |
+
|---|---|
| batch_first | +(bool, optional): if |
+
| padding_value | +(float, optional): values for padded elements. |
+
| total_length | +(int, optional): if not |
+
Tuple of Tensor containing the padded sequence, and a Tensor
+containing the list of lengths of each sequence in the batch.
+Batch elements will be re-ordered as they were ordered originally when
+the batch was passed to nn_utils_rnn_pack_padded_sequence() or
+nn_utils_rnn_pack_sequence().
The returned Tensor's data will be of size T x B x *, where T is the length
+of the longest sequence and B is the batch size. If batch_first is TRUE,
+the data will be transposed into B x T x * format.
total_length is useful to implement the
+pack sequence -> recurrent network -> unpack sequence pattern in a
+nn_module wrapped in ~torch.nn.DataParallel.
+if (torch_is_installed()) { +seq <- torch_tensor(rbind(c(1,2,0), c(3,0,0), c(4,5,6))) +lens <- c(2,1,3) +packed <- nn_utils_rnn_pack_padded_sequence(seq, lens, batch_first = TRUE, + enforce_sorted = FALSE) +packed +nn_utils_rnn_pad_packed_sequence(packed, batch_first=TRUE) + +}
padding_value — nn_utils_rnn_pad_sequence • torchpadding_valueR/nn-utils-rnn.R
+ nn_utils_rnn_pad_sequence.Rdpad_sequence stacks a list of Tensors along a new dimension,
+and pads them to equal length. For example, if the input is list of
+sequences with size L x * and if batch_first is False, and T x B x *
+otherwise.
nn_utils_rnn_pad_sequence(sequences, batch_first = FALSE, padding_value = 0)+ +
| sequences | +
|
+
|---|---|
| batch_first | +(bool, optional): output will be in |
+
| padding_value | +(float, optional): value for padded elements. Default: 0. |
+
Tensor of size T x B x * if batch_first is FALSE.
+Tensor of size B x T x * otherwise
B is batch size. It is equal to the number of elements in sequences.
+T is length of the longest sequence.
+L is length of the sequence.
+* is any number of trailing dimensions, including none.
This function returns a Tensor of size T x B x * or B x T x *
+where T is the length of the longest sequence. This function assumes
+trailing dimensions and type of all the Tensors in sequences are same.
+if (torch_is_installed()) { +a <- torch_ones(25, 300) +b <- torch_ones(22, 300) +c <- torch_ones(15, 300) +nn_utils_rnn_pad_sequence(list(a, b, c))$size() + +}
Applies a 1D adaptive average pooling over an input signal composed of +several input planes.
+nnf_adaptive_avg_pool1d(input, output_size)+ +
| input | +input tensor of shape (minibatch , in_channels , iW) |
+
|---|---|
| output_size | +the target output size (single integer) |
+
Applies a 2D adaptive average pooling over an input signal composed of +several input planes.
+nnf_adaptive_avg_pool2d(input, output_size)+ +
| input | +input tensor (minibatch, in_channels , iH , iW) |
+
|---|---|
| output_size | +the target output size (single integer or double-integer tuple) |
+
Applies a 3D adaptive average pooling over an input signal composed of +several input planes.
+nnf_adaptive_avg_pool3d(input, output_size)+ +
| input | +input tensor (minibatch, in_channels , iT * iH , iW) |
+
|---|---|
| output_size | +the target output size (single integer or triple-integer tuple) |
+
Applies a 1D adaptive max pooling over an input signal composed of +several input planes.
+nnf_adaptive_max_pool1d(input, output_size, return_indices = FALSE)+ +
| input | +input tensor of shape (minibatch , in_channels , iW) |
+
|---|---|
| output_size | +the target output size (single integer) |
+
| return_indices | +whether to return pooling indices. Default: |
+
Applies a 2D adaptive max pooling over an input signal composed of +several input planes.
+nnf_adaptive_max_pool2d(input, output_size, return_indices = FALSE)+ +
| input | +input tensor (minibatch, in_channels , iH , iW) |
+
|---|---|
| output_size | +the target output size (single integer or double-integer tuple) |
+
| return_indices | +whether to return pooling indices. Default: |
+
Applies a 3D adaptive max pooling over an input signal composed of +several input planes.
+nnf_adaptive_max_pool3d(input, output_size, return_indices = FALSE)+ +
| input | +input tensor (minibatch, in_channels , iT * iH , iW) |
+
|---|---|
| output_size | +the target output size (single integer or triple-integer tuple) |
+
| return_indices | +whether to return pooling indices. Default: |
+
Generates a 2D or 3D flow field (sampling grid), given a batch of
+affine matrices theta.
nnf_affine_grid(theta, size, align_corners = FALSE)+ +
| theta | +(Tensor) input batch of affine matrices with shape +(\(N \times 2 \times 3\)) for 2D or (\(N \times 3 \times 4\)) for 3D |
+
|---|---|
| size | +(torch.Size) the target output image size. (\(N \times C \times H \times W\) +for 2D or \(N \times C \times D \times H \times W\) for 3D) +Example: torch.Size((32, 3, 24, 24)) |
+
| align_corners | +(bool, optional) if |
+
This function is often used in conjunction with nnf_grid_sample()
+to build Spatial Transformer Networks_ .
Applies alpha dropout to the input.
+nnf_alpha_dropout(input, p = 0.5, training = FALSE, inplace = FALSE)+ +
| input | +the input tensor |
+
|---|---|
| p | +probability of an element to be zeroed. Default: 0.5 |
+
| training | +apply dropout if is |
+
| inplace | +If set to |
+
Applies a 1D average pooling over an input signal composed of several +input planes.
+nnf_avg_pool1d( + input, + kernel_size, + stride = NULL, + padding = 0, + ceil_mode = FALSE, + count_include_pad = TRUE +)+ +
| input | +input tensor of shape (minibatch , in_channels , iW) |
+
|---|---|
| kernel_size | +the size of the window. Can be a single number or a
+tuple |
+
| stride | +the stride of the window. Can be a single number or a tuple
+ |
+
| padding | +implicit zero paddings on both sides of the input. Can be a
+single number or a tuple |
+
| ceil_mode | +when True, will use |
+
| count_include_pad | +when True, will include the zero-padding in the
+averaging calculation. Default: |
+
Applies 2D average-pooling operation in \(kH * kW\) regions by step size +\(sH * sW\) steps. The number of output features is equal to the number of +input planes.
+nnf_avg_pool2d( + input, + kernel_size, + stride = NULL, + padding = 0, + ceil_mode = FALSE, + count_include_pad = TRUE, + divisor_override = NULL +)+ +
| input | +input tensor (minibatch, in_channels , iH , iW) |
+
|---|---|
| kernel_size | +size of the pooling region. Can be a single number or a
+tuple |
+
| stride | +stride of the pooling operation. Can be a single number or a
+tuple |
+
| padding | +implicit zero paddings on both sides of the input. Can be a
+single number or a tuple |
+
| ceil_mode | +when True, will use |
+
| count_include_pad | +when True, will include the zero-padding in the
+averaging calculation. Default: |
+
| divisor_override | +if specified, it will be used as divisor, otherwise
+size of the pooling region will be used. Default: |
+
Applies 3D average-pooling operation in \(kT * kH * kW\) regions by step +size \(sT * sH * sW\) steps. The number of output features is equal to +\(\lfloor \frac{ \mbox{input planes} }{sT} \rfloor\).
+nnf_avg_pool3d( + input, + kernel_size, + stride = NULL, + padding = 0, + ceil_mode = FALSE, + count_include_pad = TRUE, + divisor_override = NULL +)+ +
| input | +input tensor (minibatch, in_channels , iT * iH , iW) |
+
|---|---|
| kernel_size | +size of the pooling region. Can be a single number or a
+tuple |
+
| stride | +stride of the pooling operation. Can be a single number or a
+tuple |
+
| padding | +implicit zero paddings on both sides of the input. Can be a
+single number or a tuple |
+
| ceil_mode | +when True, will use |
+
| count_include_pad | +when True, will include the zero-padding in the +averaging calculation |
+
| divisor_override | +NA if specified, it will be used as divisor, otherwise
+size of the pooling region will be used. Default: |
+
Applies Batch Normalization for each channel across a batch of data.
+nnf_batch_norm( + input, + running_mean, + running_var, + weight = NULL, + bias = NULL, + training = FALSE, + momentum = 0.1, + eps = 1e-05 +)+ +
| input | +input tensor |
+
|---|---|
| running_mean | +the running_mean tensor |
+
| running_var | +the running_var tensor |
+
| weight | +the weight tensor |
+
| bias | +the bias tensor |
+
| training | +bool wether it's training. Default: FALSE |
+
| momentum | +the value used for the |
+
| eps | +a value added to the denominator for numerical stability. Default: 1e-5 |
+
Applies a bilinear transformation to the incoming data: +\(y = x_1 A x_2 + b\)
+nnf_bilinear(input1, input2, weight, bias = NULL)+ +
| input1 | +\((N, *, H_{in1})\) where \(H_{in1}=\mbox{in1\_features}\) +and \(*\) means any number of additional dimensions. +All but the last dimension of the inputs should be the same. |
+
|---|---|
| input2 | +\((N, *, H_{in2})\) where \(H_{in2}=\mbox{in2\_features}\) |
+
| weight | +\((\mbox{out\_features}, \mbox{in1\_features}, +\mbox{in2\_features})\) |
+
| bias | +\((\mbox{out\_features})\) |
+
output \((N, *, H_{out})\) where \(H_{out}=\mbox{out\_features}\) +and all but the last dimension are the same shape as the input.
+ +Function that measures the Binary Cross Entropy +between the target and the output.
+nnf_binary_cross_entropy( + input, + target, + weight = NULL, + reduction = c("mean", "sum", "none") +)+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| weight | +(tensor) weight for each value. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
R/nnf-loss.R
+ nnf_binary_cross_entropy_with_logits.RdFunction that measures Binary Cross Entropy between target and output +logits.
+nnf_binary_cross_entropy_with_logits( + input, + target, + weight = NULL, + reduction = c("mean", "sum", "none"), + pos_weight = NULL +)+ +
| input | +Tensor of arbitrary shape |
+
|---|---|
| target | +Tensor of the same shape as input |
+
| weight | +(Tensor, optional) a manual rescaling weight if provided it's +repeated to match input tensor shape. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
| pos_weight | +(Tensor, optional) a weight of positive examples. +Must be a vector with length equal to the number of classes. |
+
Applies element-wise, \(CELU(x) = max(0,x) + min(0, \alpha * (exp(x \alpha) - 1))\).
+nnf_celu(input, alpha = 1, inplace = FALSE) + +nnf_celu_(input, alpha = 1)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| alpha | +the alpha value for the CELU formulation. Default: 1.0 |
+
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
Applies a 1D convolution over an input signal composed of several input +planes.
+nnf_conv1d( + input, + weight, + bias = NULL, + stride = 1, + padding = 0, + dilation = 1, + groups = 1 +)+ +
| input | +input tensor of shape (minibatch, in_channels , iW) |
+
|---|---|
| weight | +filters of shape (out_channels, in_channels/groups , kW) |
+
| bias | +optional bias of shape (out_channels). Default: |
+
| stride | +the stride of the convolving kernel. Can be a single number or
+a one-element tuple |
+
| padding | +implicit paddings on both sides of the input. Can be a
+single number or a one-element tuple |
+
| dilation | +the spacing between kernel elements. Can be a single number or
+a one-element tuple |
+
| groups | +split input into groups, |
+
Applies a 2D convolution over an input image composed of several input +planes.
+nnf_conv2d( + input, + weight, + bias = NULL, + stride = 1, + padding = 0, + dilation = 1, + groups = 1 +)+ +
| input | +input tensor of shape (minibatch, in_channels, iH , iW) |
+
|---|---|
| weight | +filters of shape (out_channels , in_channels/groups, kH , kW) |
+
| bias | +optional bias tensor of shape (out_channels). Default: |
+
| stride | +the stride of the convolving kernel. Can be a single number or a
+tuple |
+
| padding | +implicit paddings on both sides of the input. Can be a
+single number or a tuple |
+
| dilation | +the spacing between kernel elements. Can be a single number or
+a tuple |
+
| groups | +split input into groups, |
+
Applies a 3D convolution over an input image composed of several input +planes.
+nnf_conv3d( + input, + weight, + bias = NULL, + stride = 1, + padding = 0, + dilation = 1, + groups = 1 +)+ +
| input | +input tensor of shape (minibatch, in_channels , iT , iH , iW) |
+
|---|---|
| weight | +filters of shape (out_channels , in_channels/groups, kT , kH , kW) |
+
| bias | +optional bias tensor of shape (out_channels). Default: |
+
| stride | +the stride of the convolving kernel. Can be a single number or a
+tuple |
+
| padding | +implicit paddings on both sides of the input. Can be a
+single number or a tuple |
+
| dilation | +the spacing between kernel elements. Can be a single number or
+a tuple |
+
| groups | +split input into groups, |
+
Applies a 1-dimensional sequence convolution over an input sequence. +Input and output dimensions are (Time, Batch, Channels) - hence TBC.
+nnf_conv_tbc(input, weight, bias, pad = 0)+ +
| input | +input tensor of shape \((\mbox{sequence length} \times +batch \times \mbox{in\_channels})\) |
+
|---|---|
| weight | +filter of shape (\(\mbox{kernel width} \times \mbox{in\_channels} +\times \mbox{out\_channels}\)) |
+
| bias | +bias of shape (\(\mbox{out\_channels}\)) |
+
| pad | +number of timesteps to pad. Default: 0 |
+
Applies a 1D transposed convolution operator over an input signal +composed of several input planes, sometimes also called "deconvolution".
+nnf_conv_transpose1d( + input, + weight, + bias = NULL, + stride = 1, + padding = 0, + output_padding = 0, + groups = 1, + dilation = 1 +)+ +
| input | +input tensor of shape (minibatch, in_channels , iW) |
+
|---|---|
| weight | +filters of shape (out_channels, in_channels/groups , kW) |
+
| bias | +optional bias of shape (out_channels). Default: |
+
| stride | +the stride of the convolving kernel. Can be a single number or
+a one-element tuple |
+
| padding | +implicit paddings on both sides of the input. Can be a
+single number or a one-element tuple |
+
| output_padding | +padding applied to the output |
+
| groups | +split input into groups, |
+
| dilation | +the spacing between kernel elements. Can be a single number or
+a one-element tuple |
+
Applies a 2D transposed convolution operator over an input image +composed of several input planes, sometimes also called "deconvolution".
+nnf_conv_transpose2d( + input, + weight, + bias = NULL, + stride = 1, + padding = 0, + output_padding = 0, + groups = 1, + dilation = 1 +)+ +
| input | +input tensor of shape (minibatch, in_channels, iH , iW) |
+
|---|---|
| weight | +filters of shape (out_channels , in_channels/groups, kH , kW) |
+
| bias | +optional bias tensor of shape (out_channels). Default: |
+
| stride | +the stride of the convolving kernel. Can be a single number or a
+tuple |
+
| padding | +implicit paddings on both sides of the input. Can be a
+single number or a tuple |
+
| output_padding | +padding applied to the output |
+
| groups | +split input into groups, |
+
| dilation | +the spacing between kernel elements. Can be a single number or
+a tuple |
+
Applies a 3D transposed convolution operator over an input image +composed of several input planes, sometimes also called "deconvolution"
+nnf_conv_transpose3d( + input, + weight, + bias = NULL, + stride = 1, + padding = 0, + output_padding = 0, + groups = 1, + dilation = 1 +)+ +
| input | +input tensor of shape (minibatch, in_channels , iT , iH , iW) |
+
|---|---|
| weight | +filters of shape (out_channels , in_channels/groups, kT , kH , kW) |
+
| bias | +optional bias tensor of shape (out_channels). Default: |
+
| stride | +the stride of the convolving kernel. Can be a single number or a
+tuple |
+
| padding | +implicit paddings on both sides of the input. Can be a
+single number or a tuple |
+
| output_padding | +padding applied to the output |
+
| groups | +split input into groups, |
+
| dilation | +the spacing between kernel elements. Can be a single number or
+a tuple |
+
Creates a criterion that measures the loss given input tensors x_1, x_2 and a +Tensor label y with values 1 or -1. This is used for measuring whether two inputs +are similar or dissimilar, using the cosine distance, and is typically used +for learning nonlinear embeddings or semi-supervised learning.
+nnf_cosine_embedding_loss( + input1, + input2, + target, + margin = 0, + reduction = c("mean", "sum", "none") +)+ +
| input1 | +the input x_1 tensor |
+
|---|---|
| input2 | +the input x_2 tensor |
+
| target | +the target tensor |
+
| margin | +Should be a number from -1 to 1 , 0 to 0.5 is suggested. If margin +is missing, the default value is 0. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Returns cosine similarity between x1 and x2, computed along dim.
+nnf_cosine_similarity(x1, x2, dim = 1, eps = 1e-08)+ +
| x1 | +(Tensor) First input. |
+
|---|---|
| x2 | +(Tensor) Second input (of size matching x1). |
+
| dim | +(int, optional) Dimension of vectors. Default: 1 |
+
| eps | +(float, optional) Small value to avoid division by zero. +Default: 1e-8 |
+
$$ + \mbox{similarity} = \frac{x_1 \cdot x_2}{\max(\Vert x_1 \Vert _2 \cdot \Vert x_2 \Vert _2, \epsilon)} +$$
+ +This criterion combines log_softmax and nll_loss in a single
+function.
nnf_cross_entropy( + input, + target, + weight = NULL, + ignore_index = -100, + reduction = c("mean", "sum", "none") +)+ +
| input | +(Tensor) \((N, C)\) where |
+
|---|---|
| target | +(Tensor) \((N)\) where each value is \(0 \leq \mbox{targets}[i] \leq C-1\), +or \((N, d_1, d_2, ..., d_K)\) where \(K \geq 1\) for K-dimensional loss. |
+
| weight | +(Tensor, optional) a manual rescaling weight given to each class. If
+given, has to be a Tensor of size |
+
| ignore_index | +(int, optional) Specifies a target value that is ignored +and does not contribute to the input gradient. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
The Connectionist Temporal Classification loss.
+nnf_ctc_loss( + log_probs, + targets, + input_lengths, + target_lengths, + blank = 0, + reduction = c("mean", "sum", "none"), + zero_infinity = FALSE +)+ +
| log_probs | +\((T, N, C)\) where C = number of characters in alphabet including blank, +T = input length, and N = batch size. The logarithmized probabilities of +the outputs (e.g. obtained with nnf_log_softmax). |
+
|---|---|
| targets | +\((N, S)\) or |
+
| input_lengths | +\((N)\). Lengths of the inputs (must each be \(\leq T\)) |
+
| target_lengths | +\((N)\). Lengths of the targets |
+
| blank | +(int, optional) Blank label. Default \(0\). |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
| zero_infinity | +(bool, optional) Whether to zero infinite losses and the
+associated gradients. Default: |
+
During training, randomly zeroes some of the elements of the input
+tensor with probability p using samples from a Bernoulli
+distribution.
nnf_dropout(input, p = 0.5, training = TRUE, inplace = FALSE)+ +
| input | +the input tensor |
+
|---|---|
| p | +probability of an element to be zeroed. Default: 0.5 |
+
| training | +apply dropout if is |
+
| inplace | +If set to |
+
Randomly zero out entire channels (a channel is a 2D feature map,
+e.g., the \(j\)-th channel of the \(i\)-th sample in the
+batched input is a 2D tensor \(input[i, j]\)) of the input tensor).
+Each channel will be zeroed out independently on every forward call with
+probability p using samples from a Bernoulli distribution.
nnf_dropout2d(input, p = 0.5, training = TRUE, inplace = FALSE)+ +
| input | +the input tensor |
+
|---|---|
| p | +probability of a channel to be zeroed. Default: 0.5 |
+
| training | +apply dropout if is |
+
| inplace | +If set to |
+
Randomly zero out entire channels (a channel is a 3D feature map,
+e.g., the \(j\)-th channel of the \(i\)-th sample in the
+batched input is a 3D tensor \(input[i, j]\)) of the input tensor).
+Each channel will be zeroed out independently on every forward call with
+probability p using samples from a Bernoulli distribution.
nnf_dropout3d(input, p = 0.5, training = TRUE, inplace = FALSE)+ +
| input | +the input tensor |
+
|---|---|
| p | +probability of a channel to be zeroed. Default: 0.5 |
+
| training | +apply dropout if is |
+
| inplace | +If set to |
+
Applies element-wise, +$$ELU(x) = max(0,x) + min(0, \alpha * (exp(x) - 1))$$.
+nnf_elu(input, alpha = 1, inplace = FALSE) + +nnf_elu_(input, alpha = 1)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| alpha | +the alpha value for the ELU formulation. Default: 1.0 |
+
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
+if (torch_is_installed()) { +x <- torch_randn(2, 2) +y <- nnf_elu(x, alpha = 1) +nnf_elu_(x, alpha = 1) +torch_equal(x, y) + +}
A simple lookup table that looks up embeddings in a fixed dictionary and size.
+nnf_embedding( + input, + weight, + padding_idx = NULL, + max_norm = NULL, + norm_type = 2, + scale_grad_by_freq = FALSE, + sparse = FALSE +)+ +
| input | +(LongTensor) Tensor containing indices into the embedding matrix |
+
|---|---|
| weight | +(Tensor) The embedding matrix with number of rows equal to the +maximum possible index + 1, and number of columns equal to the embedding size |
+
| padding_idx | +(int, optional) If given, pads the output with the embedding
+vector at |
+
| max_norm | +(float, optional) If given, each embedding vector with norm larger
+than |
+
| norm_type | +(float, optional) The p of the p-norm to compute for the |
+
| scale_grad_by_freq | +(boolean, optional) If given, this will scale gradients
+by the inverse of frequency of the words in the mini-batch. Default |
+
| sparse | +(bool, optional) If |
+
This module is often used to retrieve word embeddings using indices. +The input to the module is a list of indices, and the embedding matrix, +and the output is the corresponding word embeddings.
+ +Computes sums, means or maxes of bags of embeddings, without instantiating the
+intermediate embeddings.
nnf_embedding_bag( + input, + weight, + offsets = NULL, + max_norm = NULL, + norm_type = 2, + scale_grad_by_freq = FALSE, + mode = "mean", + sparse = FALSE, + per_sample_weights = NULL, + include_last_offset = FALSE +)+ +
| input | +(LongTensor) Tensor containing bags of indices into the embedding matrix |
+
|---|---|
| weight | +(Tensor) The embedding matrix with number of rows equal to the +maximum possible index + 1, and number of columns equal to the embedding size |
+
| offsets | +(LongTensor, optional) Only used when |
+
| max_norm | +(float, optional) If given, each embedding vector with norm
+larger than |
+
| norm_type | +(float, optional) The |
+
| scale_grad_by_freq | +(boolean, optional) if given, this will scale gradients
+by the inverse of frequency of the words in the mini-batch. Default |
+
| mode | +(string, optional) |
+
| sparse | +(bool, optional) if |
+
| per_sample_weights | +(Tensor, optional) a tensor of float / double weights,
+or NULL to indicate all weights should be taken to be 1. If specified,
+ |
+
| include_last_offset | +(bool, optional) if |
+
Combines an array of sliding local blocks into a large containing +tensor.
+nnf_fold( + input, + output_size, + kernel_size, + dilation = 1, + padding = 0, + stride = 1 +)+ +
| input | +the input tensor |
+
|---|---|
| output_size | +the shape of the spatial dimensions of the output (i.e.,
+ |
+
| kernel_size | +the size of the sliding blocks |
+
| dilation | +a parameter that controls the stride of elements within the +neighborhood. Default: 1 |
+
| padding | +implicit zero padding to be added on both sides of input. +Default: 0 |
+
| stride | +the stride of the sliding blocks in the input spatial dimensions. +Default: 1 |
+
Currently, only 4-D output tensors (batched image-like tensors) are +supported.
+ +Applies 2D fractional max pooling over an input signal composed of several input planes.
+nnf_fractional_max_pool2d( + input, + kernel_size, + output_size = NULL, + output_ratio = NULL, + return_indices = FALSE, + random_samples = NULL +)+ +
| input | +the input tensor |
+
|---|---|
| kernel_size | +the size of the window to take a max over. Can be a
+single number \(k\) (for a square kernel of \(k * k\)) or
+a tuple |
+
| output_size | +the target output size of the image of the form \(oH * oW\).
+Can be a tuple |
+
| output_ratio | +If one wants to have an output size as a ratio of the input size, +this option can be given. This has to be a number or tuple in the range (0, 1) |
+
| return_indices | +if |
+
| random_samples | +optional random samples. |
+
Fractional MaxPooling is described in detail in the paper Fractional MaxPooling_ by Ben Graham
The max-pooling operation is applied in \(kH * kW\) regions by a stochastic +step size determined by the target output size. +The number of output features is equal to the number of input planes.
+ +Applies 3D fractional max pooling over an input signal composed of several input planes.
+nnf_fractional_max_pool3d( + input, + kernel_size, + output_size = NULL, + output_ratio = NULL, + return_indices = FALSE, + random_samples = NULL +)+ +
| input | +the input tensor |
+
|---|---|
| kernel_size | +the size of the window to take a max over. Can be a single number \(k\)
+(for a square kernel of \(k * k * k\)) or a tuple |
+
| output_size | +the target output size of the form \(oT * oH * oW\).
+Can be a tuple |
+
| output_ratio | +If one wants to have an output size as a ratio of the +input size, this option can be given. This has to be a number or tuple in the +range (0, 1) |
+
| return_indices | +if |
+
| random_samples | +undocumented argument. |
+
Fractional MaxPooling is described in detail in the paper Fractional MaxPooling_ by Ben Graham
The max-pooling operation is applied in \(kT * kH * kW\) regions by a stochastic +step size determined by the target output size. +The number of output features is equal to the number of input planes.
+ +Gelu
+nnf_gelu(input)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|
Applies element-wise the function +\(GELU(x) = x * \Phi(x)\)
+where \(\Phi(x)\) is the Cumulative Distribution Function for +Gaussian Distribution.
+ + +The gated linear unit. Computes:
+nnf_glu(input, dim = -1)+ +
| input | +(Tensor) input tensor |
+
|---|---|
| dim | +(int) dimension on which to split the input. Default: -1 |
+
$$GLU(a, b) = a \otimes \sigma(b)$$
+where input is split in half along dim to form a and b, \(\sigma\)
+is the sigmoid function and \(\otimes\) is the element-wise product
+between matrices.
Given an input and a flow-field grid, computes the
+output using input values and pixel locations from grid.
nnf_grid_sample( + input, + grid, + mode = c("bilinear", "nearest"), + padding_mode = c("zeros", "border", "reflection"), + align_corners = FALSE +)+ +
| input | +(Tensor) input of shape \((N, C, H_{\mbox{in}}, W_{\mbox{in}})\) (4-D case) or \((N, C, D_{\mbox{in}}, H_{\mbox{in}}, W_{\mbox{in}})\) (5-D case) |
+
|---|---|
| grid | +(Tensor) flow-field of shape \((N, H_{\mbox{out}}, W_{\mbox{out}}, 2)\) (4-D case) or \((N, D_{\mbox{out}}, H_{\mbox{out}}, W_{\mbox{out}}, 3)\) (5-D case) |
+
| mode | +(str) interpolation mode to calculate output values |
+
| padding_mode | +(str) padding mode for outside grid values |
+
| align_corners | +(bool, optional) Geometrically, we consider the pixels of the
+input as squares rather than points. If set to |
+
Currently, only spatial (4-D) and volumetric (5-D) input are
+supported.
In the spatial (4-D) case, for input with shape
+\((N, C, H_{\mbox{in}}, W_{\mbox{in}})\) and grid with shape
+\((N, H_{\mbox{out}}, W_{\mbox{out}}, 2)\), the output will have shape
+\((N, C, H_{\mbox{out}}, W_{\mbox{out}})\).
For each output location output[n, :, h, w], the size-2 vector
+grid[n, h, w] specifies input pixel locations x and y,
+which are used to interpolate the output value output[n, :, h, w].
+In the case of 5D inputs, grid[n, d, h, w] specifies the
+x, y, z pixel locations for interpolating
+output[n, :, d, h, w]. mode argument specifies nearest or
+bilinear interpolation method to sample the input pixels.
grid specifies the sampling pixel locations normalized by the
+input spatial dimensions. Therefore, it should have most values in
+the range of [-1, 1]. For example, values x = -1, y = -1 is the
+left-top pixel of input, and values x = 1, y = 1 is the
+right-bottom pixel of input.
If grid has values outside the range of [-1, 1], the corresponding
+outputs are handled as defined by padding_mode. Options are
padding_mode="zeros": use 0 for out-of-bound grid locations,
padding_mode="border": use border values for out-of-bound grid locations,
padding_mode="reflection": use values at locations reflected by
+the border for out-of-bound grid locations. For location far away
+from the border, it will keep being reflected until becoming in bound,
+e.g., (normalized) pixel location x = -3.5 reflects by border -1
+and becomes x' = 1.5, then reflects by border 1 and becomes
+x'' = -0.5.
This function is often used in conjunction with nnf_affine_grid()
+to build Spatial Transformer Networks_ .
Applies Group Normalization for last certain number of dimensions.
+nnf_group_norm(input, num_groups, weight = NULL, bias = NULL, eps = 1e-05)+ +
| input | +the input tensor |
+
|---|---|
| num_groups | +number of groups to separate the channels into |
+
| weight | +the weight tensor |
+
| bias | +the bias tensor |
+
| eps | +a value added to the denominator for numerical stability. Default: 1e-5 |
+
Samples from the Gumbel-Softmax distribution and +optionally discretizes.
+nnf_gumbel_softmax(logits, tau = 1, hard = FALSE, dim = -1)+ +
| logits | +
|
+
|---|---|
| tau | +non-negative scalar temperature |
+
| hard | +if |
+
| dim | +(int) A dimension along which softmax will be computed. Default: -1. |
+
Applies the element-wise function \(\mbox{Hardsigmoid}(x) = \frac{ReLU6(x + 3)}{6}\)
+nnf_hardsigmoid(input, inplace = FALSE)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| inplace | +NA If set to |
+
Applies the hardswish function, element-wise, as described in the paper: +Searching for MobileNetV3.
+nnf_hardswish(input, inplace = FALSE)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
$$ \mbox{Hardswish}(x) = \left\{ + \begin{array}{ll} + 0 & \mbox{if } x \le -3, \\ + x & \mbox{if } x \ge +3, \\ + x \cdot (x + 3)/6 & \mbox{otherwise} + \end{array} + \right. $$
+ +Applies the HardTanh function element-wise.
+nnf_hardtanh(input, min_val = -1, max_val = 1, inplace = FALSE) + +nnf_hardtanh_(input, min_val = -1, max_val = 1)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| min_val | +minimum value of the linear region range. Default: -1 |
+
| max_val | +maximum value of the linear region range. Default: 1 |
+
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
Measures the loss given an input tensor xx and a labels tensor yy (containing 1 or -1). +This is usually used for measuring whether two inputs are similar or dissimilar, e.g. +using the L1 pairwise distance as xx , and is typically used for learning nonlinear +embeddings or semi-supervised learning.
+nnf_hinge_embedding_loss(input, target, margin = 1, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| margin | +Has a default value of 1. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Applies Instance Normalization for each channel in each data sample in a +batch.
+nnf_instance_norm( + input, + running_mean = NULL, + running_var = NULL, + weight = NULL, + bias = NULL, + use_input_stats = TRUE, + momentum = 0.1, + eps = 1e-05 +)+ +
| input | +the input tensor |
+
|---|---|
| running_mean | +the running_mean tensor |
+
| running_var | +the running var tensor |
+
| weight | +the weight tensor |
+
| bias | +the bias tensor |
+
| use_input_stats | +whether to use input stats |
+
| momentum | +a double for the momentum |
+
| eps | +an eps double for numerical stability |
+
Down/up samples the input to either the given size or the given
+scale_factor
nnf_interpolate( + input, + size = NULL, + scale_factor = NULL, + mode = "nearest", + align_corners = FALSE, + recompute_scale_factor = NULL +)+ +
| input | +(Tensor) the input tensor |
+
|---|---|
| size | +(int or |
+
| scale_factor | +(float or |
+
| mode | +(str) algorithm used for upsampling: 'nearest' | 'linear' | 'bilinear' +| 'bicubic' | 'trilinear' | 'area' Default: 'nearest' |
+
| align_corners | +(bool, optional) Geometrically, we consider the pixels
+of the input and output as squares rather than points. If set to TRUE,
+the input and output tensors are aligned by the center points of their corner
+pixels, preserving the values at the corner pixels. If set to False, the
+input and output tensors are aligned by the corner points of their corner pixels,
+and the interpolation uses edge value padding for out-of-boundary values,
+making this operation independent of input size when |
+
| recompute_scale_factor | +(bool, optional) recompute the scale_factor
+for use in the interpolation calculation. When |
+
The algorithm used for interpolation is determined by mode.
Currently temporal, spatial and volumetric sampling are supported, i.e. +expected inputs are 3-D, 4-D or 5-D in shape.
+The input dimensions are interpreted in the form:
+mini-batch x channels x [optional depth] x [optional height] x width.
The modes available for resizing are: nearest, linear (3D-only),
+bilinear, bicubic (4D-only), trilinear (5D-only), area
The Kullback-Leibler divergence Loss.
+nnf_kl_div(input, target, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Function that takes the mean element-wise absolute value difference.
+nnf_l1_loss(input, target, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Applies Layer Normalization for last certain number of dimensions.
+nnf_layer_norm( + input, + normalized_shape, + weight = NULL, + bias = NULL, + eps = 1e-05 +)+ +
| input | +the input tensor |
+
|---|---|
| normalized_shape | +input shape from an expected input of size. If a single +integer is used, it is treated as a singleton list, and this module will normalize +over the last dimension which is expected to be of that specific size. |
+
| weight | +the weight tensor |
+
| bias | +the bias tensor |
+
| eps | +a value added to the denominator for numerical stability. Default: 1e-5 |
+
Applies element-wise, +\(LeakyReLU(x) = max(0, x) + negative_slope * min(0, x)\)
+nnf_leaky_relu(input, negative_slope = 0.01, inplace = FALSE)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| negative_slope | +Controls the angle of the negative slope. Default: 1e-2 |
+
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
Applies a linear transformation to the incoming data: \(y = xA^T + b\).
+nnf_linear(input, weight, bias = NULL)+ +
| input | +\((N, *, in\_features)\) where |
+
|---|---|
| weight | +\((out\_features, in\_features)\) the weights tensor. |
+
| bias | +optional tensor \((out\_features)\) |
+
Applies local response normalization over an input signal composed of +several input planes, where channels occupy the second dimension. +Applies normalization across channels.
+nnf_local_response_norm(input, size, alpha = 1e-04, beta = 0.75, k = 1)+ +
| input | +the input tensor |
+
|---|---|
| size | +amount of neighbouring channels used for normalization |
+
| alpha | +multiplicative factor. Default: 0.0001 |
+
| beta | +exponent. Default: 0.75 |
+
| k | +additive factor. Default: 1 |
+
Applies a softmax followed by a logarithm.
+nnf_log_softmax(input, dim = NULL, dtype = NULL)+ +
| input | +(Tensor) input |
+
|---|---|
| dim | +(int) A dimension along which log_softmax will be computed. |
+
| dtype | +( |
+
While mathematically equivalent to log(softmax(x)), doing these two +operations separately is slower, and numerically unstable. This function +uses an alternative formulation to compute the output and gradient correctly.
+ +Applies a 1D power-average pooling over an input signal composed of
+several input planes. If the sum of all inputs to the power of p is
+zero, the gradient is set to zero as well.
nnf_lp_pool1d(input, norm_type, kernel_size, stride = NULL, ceil_mode = FALSE)+ +
| input | +the input tensor |
+
|---|---|
| norm_type | +if inf than one gets max pooling if 0 you get sum pooling ( +proportional to the avg pooling) |
+
| kernel_size | +a single int, the size of the window |
+
| stride | +a single int, the stride of the window. Default value is kernel_size |
+
| ceil_mode | +when True, will use ceil instead of floor to compute the output shape |
+
Applies a 2D power-average pooling over an input signal composed of
+several input planes. If the sum of all inputs to the power of p is
+zero, the gradient is set to zero as well.
nnf_lp_pool2d(input, norm_type, kernel_size, stride = NULL, ceil_mode = FALSE)+ +
| input | +the input tensor |
+
|---|---|
| norm_type | +if inf than one gets max pooling if 0 you get sum pooling ( +proportional to the avg pooling) |
+
| kernel_size | +a single int, the size of the window |
+
| stride | +a single int, the stride of the window. Default value is kernel_size |
+
| ceil_mode | +when True, will use ceil instead of floor to compute the output shape |
+
Creates a criterion that measures the loss given inputs x1 , x2 , two 1D +mini-batch Tensors, and a label 1D mini-batch tensor y (containing 1 or -1).
+nnf_margin_ranking_loss(input1, input2, target, margin = 0, reduction = "mean")+ +
| input1 | +the first tensor |
+
|---|---|
| input2 | +the second input tensor |
+
| target | +the target tensor |
+
| margin | +Has a default value of 00 . |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Applies a 1D max pooling over an input signal composed of several input +planes.
+nnf_max_pool1d( + input, + kernel_size, + stride = NULL, + padding = 0, + dilation = 1, + ceil_mode = FALSE, + return_indices = FALSE +)+ +
| input | +input tensor of shape (minibatch , in_channels , iW) |
+
|---|---|
| kernel_size | +the size of the window. Can be a single number or a
+tuple |
+
| stride | +the stride of the window. Can be a single number or a tuple
+ |
+
| padding | +implicit zero paddings on both sides of the input. Can be a
+single number or a tuple |
+
| dilation | +controls the spacing between the kernel points; also known as +the à trous algorithm. |
+
| ceil_mode | +when True, will use |
+
| return_indices | +whether to return the indices where the max occurs. |
+
Applies a 2D max pooling over an input signal composed of several input +planes.
+nnf_max_pool2d( + input, + kernel_size, + stride = kernel_size, + padding = 0, + dilation = 1, + ceil_mode = FALSE, + return_indices = FALSE +)+ +
| input | +input tensor (minibatch, in_channels , iH , iW) |
+
|---|---|
| kernel_size | +size of the pooling region. Can be a single number or a
+tuple |
+
| stride | +stride of the pooling operation. Can be a single number or a
+tuple |
+
| padding | +implicit zero paddings on both sides of the input. Can be a
+single number or a tuple |
+
| dilation | +controls the spacing between the kernel points; also known as +the à trous algorithm. |
+
| ceil_mode | +when True, will use |
+
| return_indices | +whether to return the indices where the max occurs. |
+
Applies a 3D max pooling over an input signal composed of several input +planes.
+nnf_max_pool3d( + input, + kernel_size, + stride = NULL, + padding = 0, + dilation = 1, + ceil_mode = FALSE, + return_indices = FALSE +)+ +
| input | +input tensor (minibatch, in_channels , iT * iH , iW) |
+
|---|---|
| kernel_size | +size of the pooling region. Can be a single number or a
+tuple |
+
| stride | +stride of the pooling operation. Can be a single number or a
+tuple |
+
| padding | +implicit zero paddings on both sides of the input. Can be a
+single number or a tuple |
+
| dilation | +controls the spacing between the kernel points; also known as +the à trous algorithm. |
+
| ceil_mode | +when True, will use |
+
| return_indices | +whether to return the indices where the max occurs. |
+
Computes a partial inverse of MaxPool1d.
nnf_max_unpool1d( + input, + indices, + kernel_size, + stride = NULL, + padding = 0, + output_size = NULL +)+ +
| input | +the input Tensor to invert |
+
|---|---|
| indices | +the indices given out by max pool |
+
| kernel_size | +Size of the max pooling window. |
+
| stride | +Stride of the max pooling window. It is set to kernel_size by default. |
+
| padding | +Padding that was added to the input |
+
| output_size | +the targeted output size |
+
Computes a partial inverse of MaxPool2d.
nnf_max_unpool2d( + input, + indices, + kernel_size, + stride = NULL, + padding = 0, + output_size = NULL +)+ +
| input | +the input Tensor to invert |
+
|---|---|
| indices | +the indices given out by max pool |
+
| kernel_size | +Size of the max pooling window. |
+
| stride | +Stride of the max pooling window. It is set to kernel_size by default. |
+
| padding | +Padding that was added to the input |
+
| output_size | +the targeted output size |
+
Computes a partial inverse of MaxPool3d.
nnf_max_unpool3d( + input, + indices, + kernel_size, + stride = NULL, + padding = 0, + output_size = NULL +)+ +
| input | +the input Tensor to invert |
+
|---|---|
| indices | +the indices given out by max pool |
+
| kernel_size | +Size of the max pooling window. |
+
| stride | +Stride of the max pooling window. It is set to kernel_size by default. |
+
| padding | +Padding that was added to the input |
+
| output_size | +the targeted output size |
+
Measures the element-wise mean squared error.
+nnf_mse_loss(input, target, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Allows the model to jointly attend to information from different representation +subspaces. See reference: Attention Is All You Need
+nnf_multi_head_attention_forward( + query, + key, + value, + embed_dim_to_check, + num_heads, + in_proj_weight, + in_proj_bias, + bias_k, + bias_v, + add_zero_attn, + dropout_p, + out_proj_weight, + out_proj_bias, + training = TRUE, + key_padding_mask = NULL, + need_weights = TRUE, + attn_mask = NULL, + use_separate_proj_weight = FALSE, + q_proj_weight = NULL, + k_proj_weight = NULL, + v_proj_weight = NULL, + static_k = NULL, + static_v = NULL +)+ +
| query | +\((L, N, E)\) where L is the target sequence length, N is the batch size, E is +the embedding dimension. |
+
|---|---|
| key | +\((S, N, E)\), where S is the source sequence length, N is the batch size, E is +the embedding dimension. |
+
| value | +\((S, N, E)\) where S is the source sequence length, N is the batch size, E is +the embedding dimension. |
+
| embed_dim_to_check | +total dimension of the model. |
+
| num_heads | +parallel attention heads. |
+
| in_proj_weight | +input projection weight and bias. |
+
| in_proj_bias | +currently undocumented. |
+
| bias_k | +bias of the key and value sequences to be added at dim=0. |
+
| bias_v | +currently undocumented. |
+
| add_zero_attn | +add a new batch of zeros to the key and +value sequences at dim=1. |
+
| dropout_p | +probability of an element to be zeroed. |
+
| out_proj_weight | +the output projection weight and bias. |
+
| out_proj_bias | +currently undocumented. |
+
| training | +apply dropout if is |
+
| key_padding_mask | +\((N, S)\) where N is the batch size, S is the source sequence length.
+If a ByteTensor is provided, the non-zero positions will be ignored while the position
+with the zero positions will be unchanged. If a BoolTensor is provided, the positions with the
+value of |
+
| need_weights | +output attn_output_weights. |
+
| attn_mask | +2D mask \((L, S)\) where L is the target sequence length, S is the source sequence length.
+3D mask \((N*num_heads, L, S)\) where N is the batch size, L is the target sequence length,
+S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked
+positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+while the zero positions will be unchanged. If a BoolTensor is provided, positions with |
+
| use_separate_proj_weight | +the function accept the proj. weights for +query, key, and value in different forms. If false, in_proj_weight will be used, +which is a combination of q_proj_weight, k_proj_weight, v_proj_weight. |
+
| q_proj_weight | +input projection weight and bias. |
+
| k_proj_weight | +currently undocumented. |
+
| v_proj_weight | +currently undocumented. |
+
| static_k | +static key and value used for attention operators. |
+
| static_v | +currently undocumented. |
+
Creates a criterion that optimizes a multi-class classification hinge loss
+(margin-based loss) between input x (a 2D mini-batch Tensor) and output y
+(which is a 1D tensor of target class indices, 0 <= y <= x$size(2) - 1 ).
nnf_multi_margin_loss( + input, + target, + p = 1, + margin = 1, + weight = NULL, + reduction = "mean" +)+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| p | +Has a default value of 1. 1 and 2 are the only supported values. |
+
| margin | +Has a default value of 1. |
+
| weight | +a manual rescaling weight given to each class. If given, it has to +be a Tensor of size C. Otherwise, it is treated as if having all ones. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Creates a criterion that optimizes a multi-class multi-classification hinge loss +(margin-based loss) between input x (a 2D mini-batch Tensor) and output y (which +is a 2D Tensor of target class indices).
+nnf_multilabel_margin_loss(input, target, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Creates a criterion that optimizes a multi-label one-versus-all loss based on +max-entropy, between input x and target y of size (N, C).
+nnf_multilabel_soft_margin_loss(input, target, weight, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| weight | +weight tensor to apply on the loss. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
The negative log likelihood loss.
+nnf_nll_loss( + input, + target, + weight = NULL, + ignore_index = -100, + reduction = "mean" +)+ +
| input | +\((N, C)\) where |
+
|---|---|
| target | +\((N)\) where each value is \(0 \leq \mbox{targets}[i] \leq C-1\), +or \((N, d_1, d_2, ..., d_K)\) where \(K \geq 1\) for K-dimensional loss. |
+
| weight | +(Tensor, optional) a manual rescaling weight given to each class.
+If given, has to be a Tensor of size |
+
| ignore_index | +(int, optional) Specifies a target value that is ignored and +does not contribute to the input gradient. |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Performs \(L_p\) normalization of inputs over specified dimension.
+nnf_normalize(input, p = 2, dim = 1, eps = 1e-12, out = NULL)+ +
| input | +input tensor of any shape |
+
|---|---|
| p | +(float) the exponent value in the norm formulation. Default: 2 |
+
| dim | +(int) the dimension to reduce. Default: 1 |
+
| eps | +(float) small value to avoid division by zero. Default: 1e-12 |
+
| out | +(Tensor, optional) the output tensor. If |
+
For a tensor input of sizes \((n_0, ..., n_{dim}, ..., n_k)\), each
+\(n_{dim}\) -element vector \(v\) along dimension dim is transformed as
$$ + v = \frac{v}{\max(\Vert v \Vert_p, \epsilon)}. +$$
+With the default arguments it uses the Euclidean norm over vectors along +dimension \(1\) for normalization.
+ +Takes LongTensor with index values of shape (*) and returns a tensor
+of shape (*, num_classes) that have zeros everywhere except where the
+index of last dimension matches the corresponding value of the input tensor,
+in which case it will be 1.
nnf_one_hot(tensor, num_classes = -1)+ +
| tensor | +(LongTensor) class values of any shape. |
+
|---|---|
| num_classes | +(int) Total number of classes. If set to -1, the number +of classes will be inferred as one greater than the largest class value in +the input tensor. |
+
One-hot on Wikipedia: https://en.wikipedia.org/wiki/One-hot
+ +Pads tensor.
+nnf_pad(input, pad, mode = "constant", value = 0)+ +
| input | +(Tensor) N-dimensional tensor |
+
|---|---|
| pad | +(tuple) m-elements tuple, where \(\frac{m}{2} \leq\) input dimensions +and \(m\) is even. |
+
| mode | +'constant', 'reflect', 'replicate' or 'circular'. Default: 'constant' |
+
| value | +fill value for 'constant' padding. Default: 0. |
+
The padding size by which to pad some dimensions of input
+are described starting from the last dimension and moving forward.
+\(\left\lfloor\frac{\mbox{len(pad)}}{2}\right\rfloor\) dimensions
+of input will be padded.
+For example, to pad only the last dimension of the input tensor, then
+pad has the form
+\((\mbox{padding\_left}, \mbox{padding\_right})\);
+to pad the last 2 dimensions of the input tensor, then use
+\((\mbox{padding\_left}, \mbox{padding\_right},\)
+\(\mbox{padding\_top}, \mbox{padding\_bottom})\);
+to pad the last 3 dimensions, use
+\((\mbox{padding\_left}, \mbox{padding\_right},\)
+\(\mbox{padding\_top}, \mbox{padding\_bottom}\)
+\(\mbox{padding\_front}, \mbox{padding\_back})\).
See nn_constant_pad_2d, nn_reflection_pad_2d, and
+nn_replication_pad_2d for concrete examples on how each of the
+padding modes works. Constant padding is implemented for arbitrary dimensions.
+tensor, or the last 2 dimensions of 4D input tensor, or the last dimension of
+3D input tensor. Reflect padding is only implemented for padding the last 2
+dimensions of 4D input tensor, or the last dimension of 3D input tensor.
Computes the batchwise pairwise distance between vectors using the p-norm.
+nnf_pairwise_distance(x1, x2, p = 2, eps = 1e-06, keepdim = FALSE)+ +
| x1 | +(Tensor) First input. |
+
|---|---|
| x2 | +(Tensor) Second input (of size matching x1). |
+
| p | +the norm degree. Default: 2 |
+
| eps | +(float, optional) Small value to avoid division by zero. +Default: 1e-8 |
+
| keepdim | +Determines whether or not to keep the vector dimension. Default: False |
+
Computes the p-norm distance between every pair of row vectors in the input.
+This is identical to the upper triangular portion, excluding the diagonal, of
+torch_norm(input[:, None] - input, dim=2, p=p). This function will be faster
+if the rows are contiguous.
nnf_pdist(input, p = 2)+ +
| input | +input tensor of shape \(N \times M\). |
+
|---|---|
| p | +p value for the p-norm distance to calculate between each vector pair +\(\in [0, \infty]\). |
+
If input has shape \(N \times M\) then the output will have shape +\(\frac{1}{2} N (N - 1)\).
+ +Rearranges elements in a tensor of shape \((*, C \times r^2, H, W)\) to a +tensor of shape \((*, C, H \times r, W \times r)\).
+nnf_pixel_shuffle(input, upscale_factor)+ +
| input | +(Tensor) the input tensor |
+
|---|---|
| upscale_factor | +(int) factor to increase spatial resolution by |
+
Poisson negative log likelihood loss.
+nnf_poisson_nll_loss( + input, + target, + log_input = TRUE, + full = FALSE, + eps = 1e-08, + reduction = "mean" +)+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| log_input | +if |
+
| full | +whether to compute full loss, i. e. to add the Stirling approximation
+term. Default: |
+
| eps | +(float, optional) Small value to avoid evaluation of \(\log(0)\) when
+ |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Applies element-wise the function +\(PReLU(x) = max(0,x) + weight * min(0,x)\) +where weight is a learnable parameter.
+nnf_prelu(input, weight)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| weight | +(Tensor) the learnable weights |
+
Applies the rectified linear unit function element-wise.
+nnf_relu(input, inplace = FALSE) + +nnf_relu_(input)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
Randomized leaky ReLU.
+nnf_rrelu(input, lower = 1/8, upper = 1/3, training = FALSE, inplace = FALSE) + +nnf_rrelu_(input, lower = 1/8, upper = 1/3, training = FALSE)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| lower | +lower bound of the uniform distribution. Default: 1/8 |
+
| upper | +upper bound of the uniform distribution. Default: 1/3 |
+
| training | +bool wether it's a training pass. DEfault: FALSE |
+
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
Applies element-wise, +$$SELU(x) = scale * (max(0,x) + min(0, \alpha * (exp(x) - 1)))$$, +with \(\alpha=1.6732632423543772848170429916717\) and +\(scale=1.0507009873554804934193349852946\).
+nnf_selu(input, inplace = FALSE) + +nnf_selu_(input)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
+if (torch_is_installed()) { +x <- torch_randn(2, 2) +y <- nnf_selu(x) +nnf_selu_(x) +torch_equal(x, y) + +}
Function that uses a squared term if the absolute +element-wise error falls below 1 and an L1 term otherwise.
+nnf_smooth_l1_loss(input, target, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Creates a criterion that optimizes a two-class classification logistic loss +between input tensor x and target tensor y (containing 1 or -1).
+nnf_soft_margin_loss(input, target, reduction = "mean")+ +
| input | +tensor (N,*) where ** means, any number of additional dimensions |
+
|---|---|
| target | +tensor (N,*) , same shape as the input |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Applies a softmax function.
+nnf_softmax(input, dim, dtype = NULL)+ +
| input | +(Tensor) input |
+
|---|---|
| dim | +(int) A dimension along which softmax will be computed. |
+
| dtype | +( |
+
Softmax is defined as:
+$$Softmax(x_{i}) = exp(x_i)/\sum_j exp(x_j)$$
+It is applied to all slices along dim, and will re-scale them so that the elements
+lie in the range [0, 1] and sum to 1.
Applies a softmin function.
+nnf_softmin(input, dim, dtype = NULL)+ +
| input | +(Tensor) input |
+
|---|---|
| dim | +(int) A dimension along which softmin will be computed +(so every slice along dim will sum to 1). |
+
| dtype | +( |
+
Note that
+$$Softmin(x) = Softmax(-x)$$.
+See nnf_softmax definition for mathematical formula.
+ +Applies element-wise, the function \(Softplus(x) = 1/\beta * log(1 + exp(\beta * x))\).
+nnf_softplus(input, beta = 1, threshold = 20)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| beta | +the beta value for the Softplus formulation. Default: 1 |
+
| threshold | +values above this revert to a linear function. Default: 20 |
+
For numerical stability the implementation reverts to the linear function +when \(input * \beta > threshold\).
+ +Applies the soft shrinkage function elementwise
+nnf_softshrink(input, lambd = 0.5)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| lambd | +the lambda (must be no less than zero) value for the Softshrink +formulation. Default: 0.5 |
+
Thresholds each element of the input Tensor.
+nnf_threshold(input, threshold, value, inplace = FALSE) + +nnf_threshold_(input, threshold, value)+ +
| input | +(N,*) tensor, where * means, any number of additional +dimensions |
+
|---|---|
| threshold | +The value to threshold at |
+
| value | +The value to replace with |
+
| inplace | +can optionally do the operation in-place. Default: FALSE |
+
Creates a criterion that measures the triplet loss given an input tensors x1 , +x2 , x3 and a margin with a value greater than 0 . This is used for measuring +a relative similarity between samples. A triplet is composed by a, p and n (i.e., +anchor, positive examples and negative examples respectively). The shapes of all +input tensors should be (N, D).
+nnf_triplet_margin_loss( + anchor, + positive, + negative, + margin = 1, + p = 2, + eps = 1e-06, + swap = FALSE, + reduction = "mean" +)+ +
| anchor | +the anchor input tensor |
+
|---|---|
| positive | +the positive input tensor |
+
| negative | +the negative input tensor |
+
| margin | +Default: 1. |
+
| p | +The norm degree for pairwise distance. Default: 2. |
+
| eps | +(float, optional) Small value to avoid division by zero. |
+
| swap | +The distance swap is described in detail in the paper Learning shallow
+convolutional feature descriptors with triplet losses by V. Balntas, E. Riba et al.
+Default: |
+
| reduction | +(string, optional) – Specifies the reduction to apply to the +output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': +the sum of the output will be divided by the number of elements in the output, +'sum': the output will be summed. Default: 'mean' |
+
Extracts sliding local blocks from an batched input tensor.
+nnf_unfold(input, kernel_size, dilation = 1, padding = 0, stride = 1)+ +
| input | +the input tensor |
+
|---|---|
| kernel_size | +the size of the sliding blocks |
+
| dilation | +a parameter that controls the stride of elements within the +neighborhood. Default: 1 |
+
| padding | +implicit zero padding to be added on both sides of input. +Default: 0 |
+
| stride | +the stride of the sliding blocks in the input spatial dimensions. +Default: 1 |
+
Currently, only 4-D input tensors (batched image-like tensors) are +supported.
+ + +More than one element of the unfolded tensor may refer to a single +memory location. As a result, in-place operations (especially ones that +are vectorized) may result in incorrect behavior. If you need to write +to the tensor, please clone it first.
+ +It has been proposed in Adam: A Method for Stochastic Optimization.
+optim_adam( + params, + lr = 0.001, + betas = c(0.9, 0.999), + eps = 1e-08, + weight_decay = 0, + amsgrad = FALSE +)+ +
| params | +(iterable): iterable of parameters to optimize or dicts defining +parameter groups |
+
|---|---|
| lr | +(float, optional): learning rate (default: 1e-3) |
+
| betas | +( |
+
| eps | +(float, optional): term added to the denominator to improve +numerical stability (default: 1e-8) |
+
| weight_decay | +(float, optional): weight decay (L2 penalty) (default: 0) |
+
| amsgrad | +(boolean, optional): whether to use the AMSGrad variant of this +algorithm from the paper On the Convergence of Adam and Beyond +(default: FALSE) |
+
+if (torch_is_installed()) { +if (FALSE) { +optimizer <- optim_adam(model$parameters(), lr=0.1) +optimizer$zero_grad() +loss_fn(model(input), target)$backward() +optimizer$step() +} + +}
export
+optim_required()
+
+
+
+ Implements stochastic gradient descent (optionally with momentum). +Nesterov momentum is based on the formula from +On the importance of initialization and momentum in deep learning.
+optim_sgd( + params, + lr = optim_required(), + momentum = 0, + dampening = 0, + weight_decay = 0, + nesterov = FALSE +)+ +
| params | +(iterable): iterable of parameters to optimize or dicts defining +parameter groups |
+
|---|---|
| lr | +(float): learning rate |
+
| momentum | +(float, optional): momentum factor (default: 0) |
+
| dampening | +(float, optional): dampening for momentum (default: 0) |
+
| weight_decay | +(float, optional): weight decay (L2 penalty) (default: 0) |
+
| nesterov | +(bool, optional): enables Nesterov momentum (default: FALSE) |
+
The implementation of SGD with Momentum-Nesterov subtly differs from +Sutskever et. al. and implementations in some other frameworks.
+Considering the specific case of Momentum, the update can be written as +$$ + \begin{array}{ll} +v_{t+1} & = \mu * v_{t} + g_{t+1}, \\ +p_{t+1} & = p_{t} - \mbox{lr} * v_{t+1}, +\end{array} +$$
+where \(p\), \(g\), \(v\) and \(\mu\) denote the +parameters, gradient, velocity, and momentum respectively.
+This is in contrast to Sutskever et. al. and +other frameworks which employ an update of the form
+$$ + \begin{array}{ll} +v_{t+1} & = \mu * v_{t} + \mbox{lr} * g_{t+1}, \\ +p_{t+1} & = p_{t} - v_{t+1}. +\end{array} +$$ +The Nesterov version is analogously modified.
+ ++if (torch_is_installed()) { +if (FALSE) { +optimizer <- optim_sgd(model$parameters(), lr=0.1, momentum=0.9) +optimizer$zero_grad() +loss_fn(model(input), target)$backward() +optimizer$step() +} + +}
Abs
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise absolute value of the given input tensor.
$$ + \mbox{out}_{i} = |\mbox{input}_{i}| +$$
+ ++
Acos
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the arccosine of the elements of input.
$$ + \mbox{out}_{i} = \cos^{-1}(\mbox{input}_{i}) +$$
+ ++
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_adaptive_avg_pool1d.RdAdaptive_avg_pool1d
+| output_size | +NA the target output size (single integer) |
+
|---|
Applies a 1D adaptive average pooling over an input signal composed of +several input planes.
+See ~torch.nn.AdaptiveAvgPool1d for details and output shape.
Add
+| input | +(Tensor) the input tensor. |
+
|---|---|
| value | +(Number) the number to be added to each element of |
+
| other | +(Tensor) the second input tensor |
+
| alpha | +(Number) the scalar multiplier for |
+
Adds the scalar other to each element of the input input
+and returns a new resulting tensor.
$$
+ \mbox{out} = \mbox{input} + \mbox{other}
+$$
+If input is of type FloatTensor or DoubleTensor, other must be
+a real number, otherwise it should be an integer.
Each element of the tensor other is multiplied by the scalar
+alpha and added to each element of the tensor input.
+The resulting tensor is returned.
The shapes of input and other must be
+broadcastable .
$$
+ \mbox{out} = \mbox{input} + \mbox{alpha} \times \mbox{other}
+$$
+If other is of type FloatTensor or DoubleTensor, alpha must be
+a real number, otherwise it should be an integer.
+if (torch_is_installed()) { + +a = torch_randn(c(4)) +a +torch_add(a, 20) + + +a = torch_randn(c(4)) +a +b = torch_randn(c(4, 1)) +b +torch_add(a, b) +}
Addbmm
+| batch1 | +(Tensor) the first batch of matrices to be multiplied |
+
|---|---|
| batch2 | +(Tensor) the second batch of matrices to be multiplied |
+
| beta | +(Number, optional) multiplier for |
+
| input | +(Tensor) matrix to be added |
+
| alpha | +(Number, optional) multiplier for |
+
| out | +(Tensor, optional) the output tensor. |
+
Performs a batch matrix-matrix product of matrices stored
+in batch1 and batch2,
+with a reduced add step (all matrix multiplications get accumulated
+along the first dimension).
+input is added to the final result.
batch1 and batch2 must be 3-D tensors each containing the
+same number of matrices.
If batch1 is a \((b \times n \times m)\) tensor, batch2 is a
+\((b \times m \times p)\) tensor, input must be
+broadcastable with a \((n \times p)\) tensor
+and out will be a \((n \times p)\) tensor.
$$
+ out = \beta\ \mbox{input} + \alpha\ (\sum_{i=0}^{b-1} \mbox{batch1}_i \mathbin{@} \mbox{batch2}_i)
+$$
+For inputs of type FloatTensor or DoubleTensor, arguments beta and alpha
+must be real numbers, otherwise they should be integers.
+if (torch_is_installed()) { + +M = torch_randn(c(3, 5)) +batch1 = torch_randn(c(10, 3, 4)) +batch2 = torch_randn(c(10, 4, 5)) +torch_addbmm(M, batch1, batch2) +}
Addcdiv
+| input | +(Tensor) the tensor to be added |
+
|---|---|
| tensor1 | +(Tensor) the numerator tensor |
+
| tensor2 | +(Tensor) the denominator tensor |
+
| value | +(Number, optional) multiplier for \(\mbox{tensor1} / \mbox{tensor2}\) |
+
| out | +(Tensor, optional) the output tensor. |
+
Performs the element-wise division of tensor1 by tensor2,
+multiply the result by the scalar value and add it to input.
Integer division with addcdiv is deprecated, and in a future release
+addcdiv will perform a true division of tensor1 and tensor2.
+The current addcdiv behavior can be replicated using torch_floor_divide()
+for integral inputs
+(input + value * tensor1 // tensor2)
+and torch_div() for float inputs
+(input + value * tensor1 / tensor2).
+The new addcdiv behavior can be implemented with torch_true_divide()
+(input + value * torch.true_divide(tensor1,
+tensor2).
$$ + \mbox{out}_i = \mbox{input}_i + \mbox{value} \times \frac{\mbox{tensor1}_i}{\mbox{tensor2}_i} +$$
+The shapes of input, tensor1, and tensor2 must be
+broadcastable .
For inputs of type FloatTensor or DoubleTensor, value must be
+a real number, otherwise an integer.
+if (torch_is_installed()) { + +t = torch_randn(c(1, 3)) +t1 = torch_randn(c(3, 1)) +t2 = torch_randn(c(1, 3)) +torch_addcdiv(t, t1, t2, 0.1) +}
Addcmul
+| input | +(Tensor) the tensor to be added |
+
|---|---|
| tensor1 | +(Tensor) the tensor to be multiplied |
+
| tensor2 | +(Tensor) the tensor to be multiplied |
+
| value | +(Number, optional) multiplier for \(tensor1 .* tensor2\) |
+
| out | +(Tensor, optional) the output tensor. |
+
Performs the element-wise multiplication of tensor1
+by tensor2, multiply the result by the scalar value
+and add it to input.
$$
+ \mbox{out}_i = \mbox{input}_i + \mbox{value} \times \mbox{tensor1}_i \times \mbox{tensor2}_i
+$$
+The shapes of tensor, tensor1, and tensor2 must be
+broadcastable .
For inputs of type FloatTensor or DoubleTensor, value must be
+a real number, otherwise an integer.
+if (torch_is_installed()) { + +t = torch_randn(c(1, 3)) +t1 = torch_randn(c(3, 1)) +t2 = torch_randn(c(1, 3)) +torch_addcmul(t, t1, t2, 0.1) +}
Addmm
+| input | +(Tensor) matrix to be added |
+
|---|---|
| mat1 | +(Tensor) the first matrix to be multiplied |
+
| mat2 | +(Tensor) the second matrix to be multiplied |
+
| beta | +(Number, optional) multiplier for |
+
| alpha | +(Number, optional) multiplier for \(mat1 @ mat2\) (\(\alpha\)) |
+
| out | +(Tensor, optional) the output tensor. |
+
Performs a matrix multiplication of the matrices mat1 and mat2.
+The matrix input is added to the final result.
If mat1 is a \((n \times m)\) tensor, mat2 is a
+\((m \times p)\) tensor, then input must be
+broadcastable with a \((n \times p)\) tensor
+and out will be a \((n \times p)\) tensor.
alpha and beta are scaling factors on matrix-vector product between
+mat1 and mat2 and the added matrix input respectively.
$$
+ \mbox{out} = \beta\ \mbox{input} + \alpha\ (\mbox{mat1}_i \mathbin{@} \mbox{mat2}_i)
+$$
+For inputs of type FloatTensor or DoubleTensor, arguments beta and
+alpha must be real numbers, otherwise they should be integers.
+if (torch_is_installed()) { + +M = torch_randn(c(2, 3)) +mat1 = torch_randn(c(2, 3)) +mat2 = torch_randn(c(3, 3)) +torch_addmm(M, mat1, mat2) +}
Addmv
+| input | +(Tensor) vector to be added |
+
|---|---|
| mat | +(Tensor) matrix to be multiplied |
+
| vec | +(Tensor) vector to be multiplied |
+
| beta | +(Number, optional) multiplier for |
+
| alpha | +(Number, optional) multiplier for \(mat @ vec\) (\(\alpha\)) |
+
| out | +(Tensor, optional) the output tensor. |
+
Performs a matrix-vector product of the matrix mat and
+the vector vec.
+The vector input is added to the final result.
If mat is a \((n \times m)\) tensor, vec is a 1-D tensor of
+size m, then input must be
+broadcastable with a 1-D tensor of size n and
+out will be 1-D tensor of size n.
alpha and beta are scaling factors on matrix-vector product between
+mat and vec and the added tensor input respectively.
$$
+ \mbox{out} = \beta\ \mbox{input} + \alpha\ (\mbox{mat} \mathbin{@} \mbox{vec})
+$$
+For inputs of type FloatTensor or DoubleTensor, arguments beta and
+alpha must be real numbers, otherwise they should be integers
+if (torch_is_installed()) { + +M = torch_randn(c(2)) +mat = torch_randn(c(2, 3)) +vec = torch_randn(c(3)) +torch_addmv(M, mat, vec) +}
Addr
+| input | +(Tensor) matrix to be added |
+
|---|---|
| vec1 | +(Tensor) the first vector of the outer product |
+
| vec2 | +(Tensor) the second vector of the outer product |
+
| beta | +(Number, optional) multiplier for |
+
| alpha | +(Number, optional) multiplier for \(\mbox{vec1} \otimes \mbox{vec2}\) (\(\alpha\)) |
+
| out | +(Tensor, optional) the output tensor. |
+
Performs the outer-product of vectors vec1 and vec2
+and adds it to the matrix input.
Optional values beta and alpha are scaling factors on the
+outer product between vec1 and vec2 and the added matrix
+input respectively.
$$
+ \mbox{out} = \beta\ \mbox{input} + \alpha\ (\mbox{vec1} \otimes \mbox{vec2})
+$$
+If vec1 is a vector of size n and vec2 is a vector
+of size m, then input must be
+broadcastable with a matrix of size
+\((n \times m)\) and out will be a matrix of size
+\((n \times m)\).
For inputs of type FloatTensor or DoubleTensor, arguments beta and
+alpha must be real numbers, otherwise they should be integers
+if (torch_is_installed()) { + +vec1 = torch_arange(1., 4.) +vec2 = torch_arange(1., 3.) +M = torch_zeros(c(3, 2)) +torch_addr(M, vec1, vec2) +}
Allclose
+| input | +(Tensor) first tensor to compare |
+
|---|---|
| other | +(Tensor) second tensor to compare |
+
| atol | +(float, optional) absolute tolerance. Default: 1e-08 |
+
| rtol | +(float, optional) relative tolerance. Default: 1e-05 |
+
| equal_nan | +(bool, optional) if |
+
This function checks if all input and other satisfy the condition:
$$
+ \vert \mbox{input} - \mbox{other} \vert \leq \mbox{atol} + \mbox{rtol} \times \vert \mbox{other} \vert
+$$
+elementwise, for all elements of input and other. The behaviour of this function is analogous to
+numpy.allclose <https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html>_
+if (torch_is_installed()) { + +torch_allclose(torch_tensor(c(10000., 1e-07)), torch_tensor(c(10000.1, 1e-08))) +torch_allclose(torch_tensor(c(10000., 1e-08)), torch_tensor(c(10000.1, 1e-09))) +torch_allclose(torch_tensor(c(1.0, NaN)), torch_tensor(c(1.0, NaN))) +torch_allclose(torch_tensor(c(1.0, NaN)), torch_tensor(c(1.0, NaN)), equal_nan=TRUE) +}
Angle
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise angle (in radians) of the given input tensor.
$$ + \mbox{out}_{i} = angle(\mbox{input}_{i}) +$$
+ ++if (torch_is_installed()) { +if (FALSE) { +torch_angle(torch_tensor(c(-1 + 1i, -2 + 2i, 3 - 3i)))*180/3.14159 +} + +}
Arange
+| start | +(Number) the starting value for the set of points. Default: |
+
|---|---|
| end | +(Number) the ending value for the set of points |
+
| step | +(Number) the gap between each pair of adjacent points. Default: |
+
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a 1-D tensor of size \(\left\lceil \frac{\mbox{end} - \mbox{start}}{\mbox{step}} \right\rceil\)
+with values from the interval [start, end) taken with common difference
+step beginning from start.
Note that non-integer step is subject to floating point rounding errors when
+comparing against end; to avoid inconsistency, we advise adding a small epsilon to end
+in such cases.
$$ + \mbox{out}_{{i+1}} = \mbox{out}_{i} + \mbox{step} +$$
+ ++if (torch_is_installed()) { + +torch_arange(start = 0, end = 5) +torch_arange(1, 4) +torch_arange(1, 2.5, 0.5) +}
Argmax
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to reduce. If |
+
| keepdim | +(bool) whether the output tensor has |
+
Returns the indices of the maximum value of all elements in the input tensor.
This is the second value returned by torch_max. See its
+documentation for the exact semantics of this method.
Returns the indices of the maximum values of a tensor across a dimension.
+This is the second value returned by torch_max. See its
+documentation for the exact semantics of this method.
+if (torch_is_installed()) { + +if (FALSE) { +a = torch_randn(c(4, 4)) +a +torch_argmax(a) +} + + +a = torch_randn(c(4, 4)) +a +torch_argmax(a, dim=1) +}
Argmin
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to reduce. If |
+
| keepdim | +(bool) whether the output tensor has |
+
Returns the indices of the minimum value of all elements in the input tensor.
This is the second value returned by torch_min. See its
+documentation for the exact semantics of this method.
Returns the indices of the minimum values of a tensor across a dimension.
+This is the second value returned by torch_min. See its
+documentation for the exact semantics of this method.
+if (torch_is_installed()) { + +a = torch_randn(c(4, 4)) +a +torch_argmin(a) + + +a = torch_randn(c(4, 4)) +a +torch_argmin(a, dim=1) +}
Argsort
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int, optional) the dimension to sort along |
+
| descending | +(bool, optional) controls the sorting order (ascending or descending) |
+
Returns the indices that sort a tensor along a given dimension in ascending +order by value.
+This is the second value returned by torch_sort. See its documentation
+for the exact semantics of this method.
+
As_strided
+| input | +(Tensor) the input tensor. |
+
|---|---|
| size | +(tuple or ints) the shape of the output tensor |
+
| stride | +(tuple or ints) the stride of the output tensor |
+
| storage_offset | +(int, optional) the offset in the underlying storage of the output tensor |
+
Create a view of an existing torch_Tensor input with specified
+size, stride and storage_offset.
More than one element of a created tensor may refer to a single memory +location. As a result, in-place operations (especially ones that are +vectorized) may result in incorrect behavior. If you need to write to +the tensors, please clone them first.
Many PyTorch functions, which return a view of a tensor, are internally +implemented with this function. Those functions, like +`torch_Tensor.expand`, are easier to read and are therefore more +advisable to use. ++ + +
+if (torch_is_installed()) { + +x = torch_randn(c(3, 3)) +x +t = torch_as_strided(x, list(2, 2), list(1, 2)) +t +t = torch_as_strided(x, list(2, 2), list(1, 2), 1) +t +}
Asin
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the arcsine of the elements of input.
$$ + \mbox{out}_{i} = \sin^{-1}(\mbox{input}_{i}) +$$
+ ++
Atan
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the arctangent of the elements of input.
$$ + \mbox{out}_{i} = \tan^{-1}(\mbox{input}_{i}) +$$
+ ++
Atan2
+| input | +(Tensor) the first input tensor |
+
|---|---|
| other | +(Tensor) the second input tensor |
+
| out | +(Tensor, optional) the output tensor. |
+
Element-wise arctangent of \(\mbox{input}_{i} / \mbox{other}_{i}\) +with consideration of the quadrant. Returns a new tensor with the signed angles +in radians between vector \((\mbox{other}_{i}, \mbox{input}_{i})\) +and vector \((1, 0)\). (Note that \(\mbox{other}_{i}\), the second +parameter, is the x-coordinate, while \(\mbox{input}_{i}\), the first +parameter, is the y-coordinate.)
+The shapes of input and other must be
+broadcastable .
+
Avg_pool1d
+| input | +NA input tensor of shape \((\mbox{minibatch} , \mbox{in\_channels} , iW)\) |
+
|---|---|
| kernel_size | +NA the size of the window. Can be a single number or a tuple |
+
| stride | +NA the stride of the window. Can be a single number or a tuple |
+
| padding | +NA implicit zero paddings on both sides of the input. Can be a single number or a tuple |
+
| ceil_mode | +NA when True, will use |
+
| count_include_pad | +NA when True, will include the zero-padding in the averaging calculation. Default: |
+
Applies a 1D average pooling over an input signal composed of several +input planes.
+See ~torch.nn.AvgPool1d for details and output shape.
Baddbmm
+| input | +(Tensor) the tensor to be added |
+
|---|---|
| batch1 | +(Tensor) the first batch of matrices to be multiplied |
+
| batch2 | +(Tensor) the second batch of matrices to be multiplied |
+
| beta | +(Number, optional) multiplier for |
+
| alpha | +(Number, optional) multiplier for \(\mbox{batch1} \mathbin{@} \mbox{batch2}\) (\(\alpha\)) |
+
| out | +(Tensor, optional) the output tensor. |
+
Performs a batch matrix-matrix product of matrices in batch1
+and batch2.
+input is added to the final result.
batch1 and batch2 must be 3-D tensors each containing the same
+number of matrices.
If batch1 is a \((b \times n \times m)\) tensor, batch2 is a
+\((b \times m \times p)\) tensor, then input must be
+broadcastable with a
+\((b \times n \times p)\) tensor and out will be a
+\((b \times n \times p)\) tensor. Both alpha and beta mean the
+same as the scaling factors used in torch_addbmm.
$$
+ \mbox{out}_i = \beta\ \mbox{input}_i + \alpha\ (\mbox{batch1}_i \mathbin{@} \mbox{batch2}_i)
+$$
+For inputs of type FloatTensor or DoubleTensor, arguments beta and
+alpha must be real numbers, otherwise they should be integers.
+if (torch_is_installed()) { + +M = torch_randn(c(10, 3, 5)) +batch1 = torch_randn(c(10, 3, 4)) +batch2 = torch_randn(c(10, 4, 5)) +torch_baddbmm(M, batch1, batch2) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_bartlett_window.RdBartlett_window
+| window_length | +(int) the size of returned window |
+
|---|---|
| periodic | +(bool, optional) If True, returns a window to be used as periodic function. If False, return a symmetric window. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
If `window_length` \eqn{=1}, the returned window contains a single value 1.
+
+
+ Bartlett window function.
+$$ + w[n] = 1 - \left| \frac{2n}{N-1} - 1 \right| = \left\{ \begin{array}{ll} + \frac{2n}{N - 1} & \mbox{if } 0 \leq n \leq \frac{N - 1}{2} \\ + 2 - \frac{2n}{N - 1} & \mbox{if } \frac{N - 1}{2} < n < N \\ + \end{array} + \right. , +$$ +where \(N\) is the full window size.
+The input window_length is a positive integer controlling the
+returned window size. periodic flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+torch_stft. Therefore, if periodic is true, the \(N\) in
+above formula is in fact \(\mbox{window\_length} + 1\). Also, we always have
+torch_bartlett_window(L, periodic=True) equal to
+torch_bartlett_window(L + 1, periodic=False)[:-1]).
Bernoulli
+| input | +(Tensor) the input tensor of probability values for the Bernoulli distribution |
+
|---|---|
| generator | +( |
+
| out | +(Tensor, optional) the output tensor. |
+
Draws binary random numbers (0 or 1) from a Bernoulli distribution.
+The input tensor should be a tensor containing probabilities
+to be used for drawing the binary random number.
+Hence, all values in input have to be in the range:
+\(0 \leq \mbox{input}_i \leq 1\).
The \(\mbox{i}^{th}\) element of the output tensor will draw a
+value \(1\) according to the \(\mbox{i}^{th}\) probability value given
+in input.
$$
+ \mbox{out}_{i} \sim \mathrm{Bernoulli}(p = \mbox{input}_{i})
+$$
+The returned out tensor only has values 0 or 1 and is of the same
+shape as input.
out can have integral dtype, but input must have floating
+point dtype.
+if (torch_is_installed()) { + +a = torch_empty(c(3, 3))$uniform_(0, 1) # generate a uniform random matrix with range c(0, 1) +a +torch_bernoulli(a) +a = torch_ones(c(3, 3)) # probability of drawing "1" is 1 +torch_bernoulli(a) +a = torch_zeros(c(3, 3)) # probability of drawing "1" is 0 +torch_bernoulli(a) +}
Bincount
+| input | +(Tensor) 1-d int tensor |
+
|---|---|
| weights | +(Tensor) optional, weight for each value in the input tensor. Should be of same size as input tensor. |
+
| minlength | +(int) optional, minimum number of bins. Should be non-negative. |
+
Count the frequency of each value in an array of non-negative ints.
+The number of bins (size 1) is one larger than the largest value in
+input unless input is empty, in which case the result is a
+tensor of size 0. If minlength is specified, the number of bins is at least
+minlength and if input is empty, then the result is tensor of size
+minlength filled with zeros. If n is the value at position i,
+out[n] += weights[i] if weights is specified else
+out[n] += 1.
.. include:: cuda_deterministic.rst
+ ++if (torch_is_installed()) { + +input = torch_randint(0, 8, list(5), dtype=torch_int64()) +weights = torch_linspace(0, 1, steps=5) +input +weights +torch_bincount(input, weights) +input$bincount(weights) +}
Bitwise_and
+| input | +NA the first input tensor |
+
|---|---|
| other | +NA the second input tensor |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the bitwise AND of input and other. The input tensor must be of
+integral or Boolean types. For bool tensors, it computes the logical AND.
Bitwise_not
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the bitwise NOT of the given input tensor. The input tensor must be of +integral or Boolean types. For bool tensors, it computes the logical NOT.
+ +Bitwise_or
+| input | +NA the first input tensor |
+
|---|---|
| other | +NA the second input tensor |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the bitwise OR of input and other. The input tensor must be of
+integral or Boolean types. For bool tensors, it computes the logical OR.
Bitwise_xor
+| input | +NA the first input tensor |
+
|---|---|
| other | +NA the second input tensor |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the bitwise XOR of input and other. The input tensor must be of
+integral or Boolean types. For bool tensors, it computes the logical XOR.
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_blackman_window.RdBlackman_window
+| window_length | +(int) the size of returned window |
+
|---|---|
| periodic | +(bool, optional) If True, returns a window to be used as periodic function. If False, return a symmetric window. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
If `window_length` \eqn{=1}, the returned window contains a single value 1.
+
+
+ Blackman window function.
+$$ + w[n] = 0.42 - 0.5 \cos \left( \frac{2 \pi n}{N - 1} \right) + 0.08 \cos \left( \frac{4 \pi n}{N - 1} \right) +$$ +where \(N\) is the full window size.
+The input window_length is a positive integer controlling the
+returned window size. periodic flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+torch_stft. Therefore, if periodic is true, the \(N\) in
+above formula is in fact \(\mbox{window\_length} + 1\). Also, we always have
+torch_blackman_window(L, periodic=True) equal to
+torch_blackman_window(L + 1, periodic=False)[:-1]).
Bmm
+| input | +(Tensor) the first batch of matrices to be multiplied |
+
|---|---|
| mat2 | +(Tensor) the second batch of matrices to be multiplied |
+
| out | +(Tensor, optional) the output tensor. |
+
This function does not broadcast .
+For broadcasting matrix products, see torch_matmul.
Performs a batch matrix-matrix product of matrices stored in input
+and mat2.
input and mat2 must be 3-D tensors each containing
+the same number of matrices.
If input is a \((b \times n \times m)\) tensor, mat2 is a
+\((b \times m \times p)\) tensor, out will be a
+\((b \times n \times p)\) tensor.
$$ + \mbox{out}_i = \mbox{input}_i \mathbin{@} \mbox{mat2}_i +$$
+ ++if (torch_is_installed()) { + +input = torch_randn(c(10, 3, 4)) +mat2 = torch_randn(c(10, 4, 5)) +res = torch_bmm(input, mat2) +res +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_broadcast_tensors.RdBroadcast_tensors
+| *tensors | +NA any number of tensors of the same type |
+
|---|
Broadcasts the given tensors according to broadcasting-semantics.
+ ++if (torch_is_installed()) { + +x = torch_arange(0, 3)$view(c(1, 3)) +y = torch_arange(0, 2)$view(c(2, 1)) +out = torch_broadcast_tensors(list(x, y)) +out[[1]] +}
Can_cast
+| from | +(dtype) The original |
+
|---|---|
| to | +(dtype) The target |
+
Determines if a type conversion is allowed under PyTorch casting rules +described in the type promotion documentation .
+ ++if (torch_is_installed()) { + +torch_can_cast(torch_double(), torch_float()) +torch_can_cast(torch_float(), torch_int()) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_cartesian_prod.RdCartesian_prod
+| *tensors | +NA any number of 1 dimensional tensors. |
+
|---|
Do cartesian product of the given sequence of tensors. The behavior is similar to
+python's itertools.product.
+if (torch_is_installed()) { + +a = c(1, 2, 3) +b = c(4, 5) +tensor_a = torch_tensor(a) +tensor_b = torch_tensor(b) +torch_cartesian_prod(list(tensor_a, tensor_b)) +}
Cat
+| tensors | +(sequence of Tensors) any python sequence of tensors of the same type. Non-empty tensors provided must have the same shape, except in the cat dimension. |
+
|---|---|
| dim | +(int, optional) the dimension over which the tensors are concatenated |
+
| out | +(Tensor, optional) the output tensor. |
+
Concatenates the given sequence of seq tensors in the given dimension.
+All tensors must either have the same shape (except in the concatenating
+dimension) or be empty.
torch_cat can be seen as an inverse operation for torch_split()
+and torch_chunk.
torch_cat can be best understood via examples.
+if (torch_is_installed()) { + +x = torch_randn(c(2, 3)) +x +torch_cat(list(x, x, x), 1) +torch_cat(list(x, x, x), 2) +}
Cdist
+| x1 | +(Tensor) input tensor of shape \(B \times P \times M\). |
+
|---|---|
| x2 | +(Tensor) input tensor of shape \(B \times R \times M\). |
+
| p | +NA p value for the p-norm distance to calculate between each vector pair \(\in [0, \infty]\). |
+
| compute_mode | +NA 'use_mm_for_euclid_dist_if_necessary' - will use matrix multiplication approach to calculate euclidean distance (p = 2) if P > 25 or R > 25 'use_mm_for_euclid_dist' - will always use matrix multiplication approach to calculate euclidean distance (p = 2) 'donot_use_mm_for_euclid_dist' - will never use matrix multiplication approach to calculate euclidean distance (p = 2) Default: use_mm_for_euclid_dist_if_necessary. |
+
Computes batched the p-norm distance between each pair of the two collections of row vectors.
+ +Ceil
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the ceil of the elements of input,
+the smallest integer greater than or equal to each element.
$$ + \mbox{out}_{i} = \left\lceil \mbox{input}_{i} \right\rceil = \left\lfloor \mbox{input}_{i} \right\rfloor + 1 +$$
+ ++
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_chain_matmul.RdChain_matmul
+| matrices | +(Tensors...) a sequence of 2 or more 2-D tensors whose product is to be determined. |
+
|---|
Returns the matrix product of the \(N\) 2-D tensors. This product is efficiently computed
+using the matrix chain order algorithm which selects the order in which incurs the lowest cost in terms
+of arithmetic operations ([CLRS]_). Note that since this is a function to compute the product, \(N\)
+needs to be greater than or equal to 2; if equal to 2 then a trivial matrix-matrix product is returned.
+If \(N\) is 1, then this is a no-op - the original matrix is returned as is.
+if (torch_is_installed()) { + +a = torch_randn(c(3, 4)) +b = torch_randn(c(4, 5)) +c = torch_randn(c(5, 6)) +d = torch_randn(c(6, 7)) +torch_chain_matmul(list(a, b, c, d)) +}
Cholesky
+| input | +(Tensor) the input tensor \(A\) of size \((*, n, n)\) where |
+
|---|---|
| upper | +(bool, optional) flag that indicates whether to return a upper or lower triangular matrix. Default: |
+
| out | +(Tensor, optional) the output matrix |
+
Computes the Cholesky decomposition of a symmetric positive-definite +matrix \(A\) or for batches of symmetric positive-definite matrices.
+If upper is True, the returned matrix U is upper-triangular, and
+the decomposition has the form:
$$
+ A = U^TU
+$$
+If upper is False, the returned matrix L is lower-triangular, and
+the decomposition has the form:
$$
+ A = LL^T
+$$
+If upper is True, and \(A\) is a batch of symmetric positive-definite
+matrices, then the returned tensor will be composed of upper-triangular Cholesky factors
+of each of the individual matrices. Similarly, when upper is False, the returned
+tensor will be composed of lower-triangular Cholesky factors of each of the individual
+matrices.
+if (torch_is_installed()) { + +a = torch_randn(c(3, 3)) +a = torch_mm(a, a$t()) # make symmetric positive-definite +l = torch_cholesky(a) +a +l +torch_mm(l, l$t()) +a = torch_randn(c(3, 2, 2)) +if (FALSE) { +a = torch_matmul(a, a$transpose(-1, -2)) + 1e-03 # make symmetric positive-definite +l = torch_cholesky(a) +z = torch_matmul(l, l$transpose(-1, -2)) +torch_max(torch_abs(z - a)) # Max non-zero +} +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_cholesky_inverse.RdCholesky_inverse
+| input | +(Tensor) the input 2-D tensor \(u\), a upper or lower triangular Cholesky factor |
+
|---|---|
| upper | +(bool, optional) whether to return a lower (default) or upper triangular matrix |
+
| out | +(Tensor, optional) the output tensor for |
+
Computes the inverse of a symmetric positive-definite matrix \(A\) using its
+Cholesky factor \(u\): returns matrix inv. The inverse is computed using
+LAPACK routines dpotri and spotri (and the corresponding MAGMA routines).
If upper is False, \(u\) is lower triangular
+such that the returned tensor is
$$
+ inv = (uu^{{T}})^{{-1}}
+$$
+If upper is True or not provided, \(u\) is upper
+triangular such that the returned tensor is
$$ + inv = (u^T u)^{{-1}} +$$
+ ++if (torch_is_installed()) { + +if (FALSE) { +a = torch_randn(c(3, 3)) +a = torch_mm(a, a$t()) + 1e-05 * torch_eye(3) # make symmetric positive definite +u = torch_cholesky(a) +a +torch_cholesky_inverse(u) +a$inverse() +} +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_cholesky_solve.RdCholesky_solve
+| input | +(Tensor) input matrix \(b\) of size \((*, m, k)\), where \(*\) is zero or more batch dimensions |
+
|---|---|
| input2 | +(Tensor) input matrix \(u\) of size \((*, m, m)\), where \(*\) is zero of more batch dimensions composed of upper or lower triangular Cholesky factor |
+
| upper | +(bool, optional) whether to consider the Cholesky factor as a lower or upper triangular matrix. Default: |
+
| out | +(Tensor, optional) the output tensor for |
+
Solves a linear system of equations with a positive semidefinite +matrix to be inverted given its Cholesky factor matrix \(u\).
+If upper is False, \(u\) is and lower triangular and c is
+returned such that:
$$
+ c = (u u^T)^{{-1}} b
+$$
+If upper is True or not provided, \(u\) is upper triangular
+and c is returned such that:
$$
+ c = (u^T u)^{{-1}} b
+$$
+torch_cholesky_solve(b, u) can take in 2D inputs b, u or inputs that are
+batches of 2D matrices. If the inputs are batches, then returns
+batched outputs c
+if (torch_is_installed()) { + +a = torch_randn(c(3, 3)) +a = torch_mm(a, a$t()) # make symmetric positive definite +u = torch_cholesky(a) +a +b = torch_randn(c(3, 2)) +b +torch_cholesky_solve(b, u) +torch_mm(a$inverse(), b) +}
Chunk
+| input | +(Tensor) the tensor to split |
+
|---|---|
| chunks | +(int) number of chunks to return |
+
| dim | +(int) dimension along which to split the tensor |
+
Splits a tensor into a specific number of chunks. Each chunk is a view of +the input tensor.
+Last chunk will be smaller if the tensor size along the given dimension
+dim is not divisible by chunks.
Clamp
+| input | +(Tensor) the input tensor. |
+
|---|---|
| min | +(Number) lower-bound of the range to be clamped to |
+
| max | +(Number) upper-bound of the range to be clamped to |
+
| out | +(Tensor, optional) the output tensor. |
+
| value | +(Number) minimal value of each element in the output |
+
Clamp all elements in input into the range [ min, max ] and return
+a resulting tensor:
$$
+ y_i = \left\{ \begin{array}{ll}
+ \mbox{min} & \mbox{if } x_i < \mbox{min} \\
+ x_i & \mbox{if } \mbox{min} \leq x_i \leq \mbox{max} \\
+ \mbox{max} & \mbox{if } x_i > \mbox{max}
+ \end{array}
+ \right.
+$$
+If input is of type FloatTensor or DoubleTensor, args min
+and max must be real numbers, otherwise they should be integers.
Clamps all elements in input to be larger or equal min.
If input is of type FloatTensor or DoubleTensor, value
+should be a real number, otherwise it should be an integer.
Clamps all elements in input to be smaller or equal max.
If input is of type FloatTensor or DoubleTensor, value
+should be a real number, otherwise it should be an integer.
+if (torch_is_installed()) { + +a = torch_randn(c(4)) +a +torch_clamp(a, min=-0.5, max=0.5) + + +a = torch_randn(c(4)) +a +torch_clamp(a, min=0.5) + + +a = torch_randn(c(4)) +a +torch_clamp(a, max=0.5) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_combinations.RdCombinations
+| input | +(Tensor) 1D vector. |
+
|---|---|
| r | +(int, optional) number of elements to combine |
+
| with_replacement | +(boolean, optional) whether to allow duplication in combination |
+
Compute combinations of length \(r\) of the given tensor. The behavior is similar to
+python's itertools.combinations when with_replacement is set to False, and
+itertools.combinations_with_replacement when with_replacement is set to True.
+if (torch_is_installed()) { + +a = c(1, 2, 3) +tensor_a = torch_tensor(a) +torch_combinations(tensor_a) +torch_combinations(tensor_a, r=3) +torch_combinations(tensor_a, with_replacement=TRUE) +}
Conj
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise conjugate of the given input tensor.
$$ + \mbox{out}_{i} = conj(\mbox{input}_{i}) +$$
+ ++if (torch_is_installed()) { +if (FALSE) { +torch_conj(torch_tensor(c(-1 + 1i, -2 + 2i, 3 - 3i))) +} +}
Conv1d
+| input | +NA input tensor of shape \((\mbox{minibatch} , \mbox{in\_channels} , iW)\) |
+
|---|---|
| weight | +NA filters of shape \((\mbox{out\_channels} , \frac{\mbox{in\_channels}}{\mbox{groups}} , kW)\) |
+
| bias | +NA optional bias of shape \((\mbox{out\_channels})\). Default: |
+
| stride | +NA the stride of the convolving kernel. Can be a single number or a one-element tuple |
+
| padding | +NA implicit paddings on both sides of the input. Can be a single number or a one-element tuple |
+
| dilation | +NA the spacing between kernel elements. Can be a single number or a one-element tuple |
+
| groups | +NA split input into groups, \(\mbox{in\_channels}\) should be divisible by the number of groups. Default: 1 |
+
Applies a 1D convolution over an input signal composed of several input +planes.
+See ~torch.nn.Conv1d for details and output shape.
.. include:: cudnn_deterministic.rst
+ ++if (torch_is_installed()) { + +filters = torch_randn(c(33, 16, 3)) +inputs = torch_randn(c(20, 16, 50)) +nnf_conv1d(inputs, filters) +}
Conv2d
+| input | +NA input tensor of shape \((\mbox{minibatch} , \mbox{in\_channels} , iH , iW)\) |
+
|---|---|
| weight | +NA filters of shape \((\mbox{out\_channels} , \frac{\mbox{in\_channels}}{\mbox{groups}} , kH , kW)\) |
+
| bias | +NA optional bias tensor of shape \((\mbox{out\_channels})\). Default: |
+
| stride | +NA the stride of the convolving kernel. Can be a single number or a tuple |
+
| padding | +NA implicit paddings on both sides of the input. Can be a single number or a tuple |
+
| dilation | +NA the spacing between kernel elements. Can be a single number or a tuple |
+
| groups | +NA split input into groups, \(\mbox{in\_channels}\) should be divisible by the number of groups. Default: 1 |
+
Applies a 2D convolution over an input image composed of several input +planes.
+See ~torch.nn.Conv2d for details and output shape.
.. include:: cudnn_deterministic.rst
+ ++if (torch_is_installed()) { + +# With square kernels and equal stride +filters = torch_randn(c(8,4,3,3)) +inputs = torch_randn(c(1,4,5,5)) +nnf_conv2d(inputs, filters, padding=1) +}
Conv3d
+| input | +NA input tensor of shape \((\mbox{minibatch} , \mbox{in\_channels} , iT , iH , iW)\) |
+
|---|---|
| weight | +NA filters of shape \((\mbox{out\_channels} , \frac{\mbox{in\_channels}}{\mbox{groups}} , kT , kH , kW)\) |
+
| bias | +NA optional bias tensor of shape \((\mbox{out\_channels})\). Default: None |
+
| stride | +NA the stride of the convolving kernel. Can be a single number or a tuple |
+
| padding | +NA implicit paddings on both sides of the input. Can be a single number or a tuple |
+
| dilation | +NA the spacing between kernel elements. Can be a single number or a tuple |
+
| groups | +NA split input into groups, \(\mbox{in\_channels}\) should be divisible by the number of groups. Default: 1 |
+
Applies a 3D convolution over an input image composed of several input +planes.
+See ~torch.nn.Conv3d for details and output shape.
.. include:: cudnn_deterministic.rst
+ ++if (torch_is_installed()) { + +# filters = torch_randn(c(33, 16, 3, 3, 3)) +# inputs = torch_randn(c(20, 16, 50, 10, 20)) +# nnf_conv3d(inputs, filters) +}
Conv_tbc
+| input | +NA input tensor of shape \((\mbox{sequence length} \times batch \times \mbox{in\_channels})\) |
+
|---|---|
| weight | +NA filter of shape (\(\mbox{kernel width} \times \mbox{in\_channels} \times \mbox{out\_channels}\)) |
+
| bias | +NA bias of shape (\(\mbox{out\_channels}\)) |
+
| pad | +NA number of timesteps to pad. Default: 0 |
+
Applies a 1-dimensional sequence convolution over an input sequence. +Input and output dimensions are (Time, Batch, Channels) - hence TBC.
+ +R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_conv_transpose1d.RdConv_transpose1d
+| input | +NA input tensor of shape \((\mbox{minibatch} , \mbox{in\_channels} , iW)\) |
+
|---|---|
| weight | +NA filters of shape \((\mbox{in\_channels} , \frac{\mbox{out\_channels}}{\mbox{groups}} , kW)\) |
+
| bias | +NA optional bias of shape \((\mbox{out\_channels})\). Default: None |
+
| stride | +NA the stride of the convolving kernel. Can be a single number or a tuple |
+
| padding | +NA |
+
| output_padding | +NA additional size added to one side of each dimension in the output shape. Can be a single number or a tuple |
+
| groups | +NA split input into groups, \(\mbox{in\_channels}\) should be divisible by the number of groups. Default: 1 |
+
| dilation | +NA the spacing between kernel elements. Can be a single number or a tuple |
+
Applies a 1D transposed convolution operator over an input signal +composed of several input planes, sometimes also called "deconvolution".
+See ~torch.nn.ConvTranspose1d for details and output shape.
.. include:: cudnn_deterministic.rst
+ ++if (torch_is_installed()) { + +inputs = torch_randn(c(20, 16, 50)) +weights = torch_randn(c(16, 33, 5)) +nnf_conv_transpose1d(inputs, weights) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_conv_transpose2d.RdConv_transpose2d
+| input | +NA input tensor of shape \((\mbox{minibatch} , \mbox{in\_channels} , iH , iW)\) |
+
|---|---|
| weight | +NA filters of shape \((\mbox{in\_channels} , \frac{\mbox{out\_channels}}{\mbox{groups}} , kH , kW)\) |
+
| bias | +NA optional bias of shape \((\mbox{out\_channels})\). Default: None |
+
| stride | +NA the stride of the convolving kernel. Can be a single number or a tuple |
+
| padding | +NA |
+
| output_padding | +NA additional size added to one side of each dimension in the output shape. Can be a single number or a tuple |
+
| groups | +NA split input into groups, \(\mbox{in\_channels}\) should be divisible by the number of groups. Default: 1 |
+
| dilation | +NA the spacing between kernel elements. Can be a single number or a tuple |
+
Applies a 2D transposed convolution operator over an input image +composed of several input planes, sometimes also called "deconvolution".
+See ~torch.nn.ConvTranspose2d for details and output shape.
.. include:: cudnn_deterministic.rst
+ ++if (torch_is_installed()) { + +# With square kernels and equal stride +inputs = torch_randn(c(1, 4, 5, 5)) +weights = torch_randn(c(4, 8, 3, 3)) +nnf_conv_transpose2d(inputs, weights, padding=1) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_conv_transpose3d.RdConv_transpose3d
+| input | +NA input tensor of shape \((\mbox{minibatch} , \mbox{in\_channels} , iT , iH , iW)\) |
+
|---|---|
| weight | +NA filters of shape \((\mbox{in\_channels} , \frac{\mbox{out\_channels}}{\mbox{groups}} , kT , kH , kW)\) |
+
| bias | +NA optional bias of shape \((\mbox{out\_channels})\). Default: None |
+
| stride | +NA the stride of the convolving kernel. Can be a single number or a tuple |
+
| padding | +NA |
+
| output_padding | +NA additional size added to one side of each dimension in the output shape. Can be a single number or a tuple |
+
| groups | +NA split input into groups, \(\mbox{in\_channels}\) should be divisible by the number of groups. Default: 1 |
+
| dilation | +NA the spacing between kernel elements. Can be a single number or a tuple |
+
Applies a 3D transposed convolution operator over an input image +composed of several input planes, sometimes also called "deconvolution"
+See ~torch.nn.ConvTranspose3d for details and output shape.
.. include:: cudnn_deterministic.rst
+ ++if (torch_is_installed()) { +if (FALSE) { +inputs = torch_randn(c(20, 16, 50, 10, 20)) +weights = torch_randn(c(16, 33, 3, 3, 3)) +nnf_conv_transpose3d(inputs, weights) +} +}
Cos
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the cosine of the elements of input.
$$ + \mbox{out}_{i} = \cos(\mbox{input}_{i}) +$$
+ ++
Cosh
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the hyperbolic cosine of the elements of
+input.
$$ + \mbox{out}_{i} = \cosh(\mbox{input}_{i}) +$$
+ ++
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_cosine_similarity.RdCosine_similarity
+| x1 | +(Tensor) First input. |
+
|---|---|
| x2 | +(Tensor) Second input (of size matching x1). |
+
| dim | +(int, optional) Dimension of vectors. Default: 1 |
+
| eps | +(float, optional) Small value to avoid division by zero. Default: 1e-8 |
+
Returns cosine similarity between x1 and x2, computed along dim.
+$$ + \mbox{similarity} = \frac{x_1 \cdot x_2}{\max(\Vert x_1 \Vert _2 \cdot \Vert x_2 \Vert _2, \epsilon)} +$$
+ ++if (torch_is_installed()) { + +input1 = torch_randn(c(100, 128)) +input2 = torch_randn(c(100, 128)) +output = torch_cosine_similarity(input1, input2) +output +}
Cross
+| input | +(Tensor) the input tensor. |
+
|---|---|
| other | +(Tensor) the second input tensor |
+
| dim | +(int, optional) the dimension to take the cross-product in. |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the cross product of vectors in dimension dim of input
+and other.
input and other must have the same size, and the size of their
+dim dimension should be 3.
If dim is not given, it defaults to the first dimension found with the
+size 3.
+if (torch_is_installed()) { + +a = torch_randn(c(4, 3)) +a +b = torch_randn(c(4, 3)) +b +torch_cross(a, b, dim=2) +torch_cross(a, b) +}
Cummax
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to do the operation over |
+
| out | +(tuple, optional) the result tuple of two output tensors (values, indices) |
+
Returns a namedtuple (values, indices) where values is the cumulative maximum of
+elements of input in the dimension dim. And indices is the index
+location of each maximum value found in the dimension dim.
$$ + y_i = max(x_1, x_2, x_3, \dots, x_i) +$$
+ ++
Cummin
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to do the operation over |
+
| out | +(tuple, optional) the result tuple of two output tensors (values, indices) |
+
Returns a namedtuple (values, indices) where values is the cumulative minimum of
+elements of input in the dimension dim. And indices is the index
+location of each maximum value found in the dimension dim.
$$ + y_i = min(x_1, x_2, x_3, \dots, x_i) +$$
+ ++
Cumprod
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to do the operation over |
+
| dtype | +( |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the cumulative product of elements of input in the dimension
+dim.
For example, if input is a vector of size N, the result will also be
+a vector of size N, with elements.
$$ + y_i = x_1 \times x_2\times x_3\times \dots \times x_i +$$
+ ++
Cumsum
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to do the operation over |
+
| dtype | +( |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the cumulative sum of elements of input in the dimension
+dim.
For example, if input is a vector of size N, the result will also be
+a vector of size N, with elements.
$$ + y_i = x_1 + x_2 + x_3 + \dots + x_i +$$
+ ++
Det
+| input | +(Tensor) the input tensor of size |
+
|---|
Backward through `det` internally uses SVD results when `input` is +not invertible. In this case, double backward through `det` will be +unstable in when `input` doesn't have distinct singular values. See +`~torch.svd` for details. ++ +
Calculates determinant of a square matrix or batches of square matrices.
+ ++if (torch_is_installed()) { + +A = torch_randn(c(3, 3)) +torch_det(A) +A = torch_randn(c(3, 2, 2)) +A +A$det() +}
A torch_device is an object representing the device on which a torch_tensor
+is or will be allocated.
torch_device(type, index = NULL)+ +
| type | +(character) a device type |
+
|---|---|
| index | +(integer) optional device ordinal for the device type. If the device ordinal
+is not present, this object will always represent the current device for the device
+type, even after A |
+
+if (torch_is_installed()) { + +# Via string +torch_device("cuda:1") +torch_device("cpu") +torch_device("cuda") # current cuda device + +# Via string and device ordinal +torch_device("cuda", 0) +torch_device("cpu", 0) + +}
Diag
+| input | +(Tensor) the input tensor. |
+
|---|---|
| diagonal | +(int, optional) the diagonal to consider |
+
| out | +(Tensor, optional) the output tensor. |
+
If input is a vector (1-D tensor), then returns a 2-D square tensor
+with the elements of input as the diagonal.
If input is a matrix (2-D tensor), then returns a 1-D tensor with
+the diagonal elements of input.
The argument diagonal controls which diagonal to consider:
If diagonal = 0, it is the main diagonal.
If diagonal > 0, it is above the main diagonal.
If diagonal < 0, it is below the main diagonal.
Diag_embed
+| input | +(Tensor) the input tensor. Must be at least 1-dimensional. |
+
|---|---|
| offset | +(int, optional) which diagonal to consider. Default: 0 (main diagonal). |
+
| dim1 | +(int, optional) first dimension with respect to which to take diagonal. Default: -2. |
+
| dim2 | +(int, optional) second dimension with respect to which to take diagonal. Default: -1. |
+
Creates a tensor whose diagonals of certain 2D planes (specified by
+dim1 and dim2) are filled by input.
+To facilitate creating batched diagonal matrices, the 2D planes formed by
+the last two dimensions of the returned tensor are chosen by default.
The argument offset controls which diagonal to consider:
If offset = 0, it is the main diagonal.
If offset > 0, it is above the main diagonal.
If offset < 0, it is below the main diagonal.
The size of the new matrix will be calculated to make the specified diagonal
+of the size of the last input dimension.
+Note that for offset other than \(0\), the order of dim1
+and dim2 matters. Exchanging them is equivalent to changing the
+sign of offset.
Applying torch_diagonal to the output of this function with
+the same arguments yields a matrix identical to input. However,
+torch_diagonal has different default dimensions, so those
+need to be explicitly specified.
+if (torch_is_installed()) { + +a = torch_randn(c(2, 3)) +torch_diag_embed(a) +torch_diag_embed(a, offset=1, dim1=1, dim2=3) +}
Diagflat
+| input | +(Tensor) the input tensor. |
+
|---|---|
| offset | +(int, optional) the diagonal to consider. Default: 0 (main diagonal). |
+
If input is a vector (1-D tensor), then returns a 2-D square tensor
+with the elements of input as the diagonal.
If input is a tensor with more than one dimension, then returns a
+2-D tensor with diagonal elements equal to a flattened input.
The argument offset controls which diagonal to consider:
If offset = 0, it is the main diagonal.
If offset > 0, it is above the main diagonal.
If offset < 0, it is below the main diagonal.
+if (torch_is_installed()) { + +a = torch_randn(c(3)) +a +torch_diagflat(a) +torch_diagflat(a, 1) +a = torch_randn(c(2, 2)) +a +torch_diagflat(a) +}
Diagonal
+| input | +(Tensor) the input tensor. Must be at least 2-dimensional. |
+
|---|---|
| offset | +(int, optional) which diagonal to consider. Default: 0 (main diagonal). |
+
| dim1 | +(int, optional) first dimension with respect to which to take diagonal. Default: 0. |
+
| dim2 | +(int, optional) second dimension with respect to which to take diagonal. Default: 1. |
+
Returns a partial view of input with the its diagonal elements
+with respect to dim1 and dim2 appended as a dimension
+at the end of the shape.
The argument offset controls which diagonal to consider:
If offset = 0, it is the main diagonal.
If offset > 0, it is above the main diagonal.
If offset < 0, it is below the main diagonal.
Applying torch_diag_embed to the output of this function with
+the same arguments yields a diagonal matrix with the diagonal entries
+of the input. However, torch_diag_embed has different default
+dimensions, so those need to be explicitly specified.
+if (torch_is_installed()) { + +a = torch_randn(c(3, 3)) +a +torch_diagonal(a, offset = 0) +torch_diagonal(a, offset = 1) +x = torch_randn(c(2, 5, 4, 2)) +torch_diagonal(x, offset=-1, dim1=1, dim2=2) +}
Digamma
+| input | +(Tensor) the tensor to compute the digamma function on |
+
|---|
Computes the logarithmic derivative of the gamma function on input.
$$ + \psi(x) = \frac{d}{dx} \ln\left(\Gamma\left(x\right)\right) = \frac{\Gamma'(x)}{\Gamma(x)} +$$
+ ++
Dist
+| input | +(Tensor) the input tensor. |
+
|---|---|
| other | +(Tensor) the Right-hand-side input tensor |
+
| p | +(float, optional) the norm to be computed |
+
Returns the p-norm of (input - other)
The shapes of input and other must be
+broadcastable .
+if (torch_is_installed()) { + +x = torch_randn(c(4)) +x +y = torch_randn(c(4)) +y +torch_dist(x, y, 3.5) +torch_dist(x, y, 3) +torch_dist(x, y, 0) +torch_dist(x, y, 1) +}
Div
+| input | +(Tensor) the input tensor. |
+
|---|---|
| other | +(Number) the number to be divided to each element of |
+
Divides each element of the input input with the scalar other and
+returns a new resulting tensor.
Each element of the tensor input is divided by each element of the tensor
+other. The resulting tensor is returned.
$$
+ \mbox{out}_i = \frac{\mbox{input}_i}{\mbox{other}_i}
+$$
+The shapes of input and other must be broadcastable
+. If the torch_dtype of input and
+other differ, the torch_dtype of the result tensor is determined
+following rules described in the type promotion documentation
+. If out is specified, the result must be
+castable to the torch_dtype of the
+specified output tensor. Integral division by zero leads to undefined behavior.
Integer division using div is deprecated, and in a future release div will
+perform true division like torch_true_divide.
+Use torch_floor_divide (// in Python) to perform integer division,
+instead.
$$
+ \mbox{out}_i = \frac{\mbox{input}_i}{\mbox{other}}
+$$
+If the torch_dtype of input and other differ, the
+torch_dtype of the result tensor is determined following rules
+described in the type promotion documentation . If
+out is specified, the result must be castable
+to the torch_dtype of the specified output tensor. Integral division
+by zero leads to undefined behavior.
+if (torch_is_installed()) { + +a = torch_randn(c(5)) +a +torch_div(a, 0.5) + + +a = torch_randn(c(4, 4)) +a +b = torch_randn(c(4)) +b +torch_div(a, b) +}
Returns the correspondent data type.
+torch_float32() + +torch_float() + +torch_float64() + +torch_double() + +torch_float16() + +torch_half() + +torch_uint8() + +torch_int8() + +torch_int16() + +torch_short() + +torch_int32() + +torch_int() + +torch_int64() + +torch_long() + +torch_bool() + +torch_quint8() + +torch_qint8() + +torch_qint32()+ + + +
Eig
+| input | +(Tensor) the square matrix of shape \((n \times n)\) for which the eigenvalues and eigenvectors will be computed |
+
|---|---|
| eigenvectors | +(bool) |
+
| out | +(tuple, optional) the output tensors |
+
Since eigenvalues and eigenvectors might be complex, backward pass is supported only +for [`torch_symeig`] ++ +
Computes the eigenvalues and eigenvectors of a real square matrix.
+ +Einsum
+| equation | +(string) The equation is given in terms of lower case letters (indices) to be associated with each dimension of the operands and result. The left hand side lists the operands dimensions, separated by commas. There should be one index letter per tensor dimension. The right hand side follows after |
+
|---|---|
| operands | +(Tensor) The operands to compute the Einstein sum of. |
+
This function provides a way of computing multilinear expressions (i.e. sums of products) using the +Einstein summation convention.
+ ++if (torch_is_installed()) { + +if (FALSE) { + +x = torch_randn(c(5)) +y = torch_randn(c(4)) +torch_einsum('i,j->ij', list(x, y)) # outer product +A = torch_randn(c(3,5,4)) +l = torch_randn(c(2,5)) +r = torch_randn(c(2,4)) +torch_einsum('bn,anm,bm->ba', list(l, A, r)) # compare torch_nn$functional$bilinear +As = torch_randn(c(3,2,5)) +Bs = torch_randn(c(3,5,4)) +torch_einsum('bij,bjk->bik', list(As, Bs)) # batch matrix multiplication +A = torch_randn(c(3, 3)) +torch_einsum('ii->i', list(A)) # diagonal +A = torch_randn(c(4, 3, 3)) +torch_einsum('...ii->...i', list(A)) # batch diagonal +A = torch_randn(c(2, 3, 4, 5)) +torch_einsum('...ij->...ji', list(A))$shape # batch permute + +} +}
Empty
+| size | +(int...) a sequence of integers defining the shape of the output tensor. Can be a variable number of arguments or a collection like a list or tuple. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| pin_memory | +(bool, optional) If set, returned tensor would be allocated in the pinned memory. Works only for CPU tensors. Default: |
+
| memory_format | +( |
+
Returns a tensor filled with uninitialized data. The shape of the tensor is
+defined by the variable argument size.
+
Empty_like
+| input | +(Tensor) the size of |
+
|---|---|
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| memory_format | +( |
+
Returns an uninitialized tensor with the same size as input.
+torch_empty_like(input) is equivalent to
+torch_empty(input.size(), dtype=input.dtype, layout=input.layout, device=input.device).
+
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_empty_strided.RdEmpty_strided
+| size | +(tuple of ints) the shape of the output tensor |
+
|---|---|
| stride | +(tuple of ints) the strides of the output tensor |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| pin_memory | +(bool, optional) If set, returned tensor would be allocated in the pinned memory. Works only for CPU tensors. Default: |
+
Returns a tensor filled with uninitialized data. The shape and strides of the tensor is
+defined by the variable argument size and stride respectively.
+torch_empty_strided(size, stride) is equivalent to
+torch_empty(size).as_strided(size, stride).
More than one element of the created tensor may refer to a single memory +location. As a result, in-place operations (especially ones that are +vectorized) may result in incorrect behavior. If you need to write to +the tensors, please clone them first.
+ ++if (torch_is_installed()) { + +a = torch_empty_strided(list(2, 3), list(1, 2)) +a +a$stride(1) +a$size(1) +}
Eq
+| input | +(Tensor) the tensor to compare |
+
|---|---|
| other | +(Tensor or float) the tensor or value to compare |
+
| out | +(Tensor, optional) the output tensor. Must be a |
+
Computes element-wise equality
+The second argument can be a number or a tensor whose shape is +broadcastable with the first argument.
+ ++
Erf
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the error function of each element. The error function is defined as follows:
+$$ + \mathrm{erf}(x) = \frac{2}{\sqrt{\pi}} \int_{0}^{x} e^{-t^2} dt +$$
+ ++
Erfc
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the complementary error function of each element of input.
+The complementary error function is defined as follows:
$$ + \mathrm{erfc}(x) = 1 - \frac{2}{\sqrt{\pi}} \int_{0}^{x} e^{-t^2} dt +$$
+ ++
Erfinv
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the inverse error function of each element of input.
+The inverse error function is defined in the range \((-1, 1)\) as:
$$ + \mathrm{erfinv}(\mathrm{erf}(x)) = x +$$
+ ++
Exp
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the exponential of the elements
+of the input tensor input.
$$ + y_{i} = e^{x_{i}} +$$
+ ++
Expm1
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the exponential of the elements minus 1
+of input.
$$ + y_{i} = e^{x_{i}} - 1 +$$
+ ++
Eye
+| n | +(int) the number of rows |
+
|---|---|
| m | +(int, optional) the number of columns with default being |
+
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a 2-D tensor with ones on the diagonal and zeros elsewhere.
+ ++
Fft
+| input | +(Tensor) the input tensor of at least |
+
|---|---|
| signal_ndim | +(int) the number of dimensions in each signal. |
+
| normalized | +(bool, optional) controls whether to return normalized results. Default: |
+
For CUDA tensors, an LRU cache is used for cuFFT plans to speed up +repeatedly running FFT methods on tensors of same geometry with same +configuration. See cufft-plan-cache for more details on how to +monitor and control the cache. ++ +
Complex-to-complex Discrete Fourier Transform
+This method computes the complex-to-complex discrete Fourier transform. +Ignoring the batch dimensions, it computes the following expression:
+$$
+ X[\omega_1, \dots, \omega_d] =
+ \sum_{n_1=0}^{N_1-1} \dots \sum_{n_d=0}^{N_d-1} x[n_1, \dots, n_d]
+ e^{-j\ 2 \pi \sum_{i=0}^d \frac{\omega_i n_i}{N_i}},
+$$
+where \(d\) = signal_ndim is number of dimensions for the
+signal, and \(N_i\) is the size of signal dimension \(i\).
This method supports 1D, 2D and 3D complex-to-complex transforms, indicated
+by signal_ndim. input must be a tensor with last dimension
+of size 2, representing the real and imaginary components of complex
+numbers, and should have at least signal_ndim + 1 dimensions with optionally
+arbitrary number of leading batch dimensions. If normalized is set to
+True, this normalizes the result by dividing it with
+\(\sqrt{\prod_{i=1}^K N_i}\) so that the operator is unitary.
Returns the real and the imaginary parts together as one tensor of the same
+shape of input.
The inverse of this function is torch_ifft.
For CPU tensors, this method is currently only available with MKL. Use
+torch_backends.mkl.is_available to check if MKL is installed.
+if (torch_is_installed()) { + +# unbatched 2D FFT +x = torch_randn(c(4, 3, 2)) +torch_fft(x, 2) +# batched 1D FFT +torch_fft(x, 1) +# arbitrary number of batch dimensions, 2D FFT +x = torch_randn(c(3, 3, 5, 5, 2)) +torch_fft(x, 2) + +}
Flatten
+| input | +(Tensor) the input tensor. |
+
|---|---|
| start_dim | +(int) the first dim to flatten |
+
| end_dim | +(int) the last dim to flatten |
+
Flattens a contiguous range of dims in a tensor.
+ ++if (torch_is_installed()) { + +t = torch_tensor(matrix(c(1, 2), ncol = 2)) +torch_flatten(t) +torch_flatten(t, start_dim=2) +}
Flip
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dims | +(a list or tuple) axis to flip on |
+
Reverse the order of a n-D tensor along given axis in dims.
+ ++if (torch_is_installed()) { + +x = torch_arange(0, 8)$view(c(2, 2, 2)) +x +torch_flip(x, c(1, 2)) +}
Floor
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the floor of the elements of input,
+the largest integer less than or equal to each element.
$$ + \mbox{out}_{i} = \left\lfloor \mbox{input}_{i} \right\rfloor +$$
+ ++
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_floor_divide.RdFloor_divide
+| input | +(Tensor) the numerator tensor |
+
|---|---|
| other | +(Tensor or Scalar) the denominator |
+
Return the division of the inputs rounded down to the nearest integer. See torch_div
+for type promotion and broadcasting rules.
$$ + \mbox{{out}}_i = \left\lfloor \frac{{\mbox{{input}}_i}}{{\mbox{{other}}_i}} \right\rfloor +$$
+ ++if (torch_is_installed()) { + +a = torch_tensor(c(4.0, 3.0)) +b = torch_tensor(c(2.0, 2.0)) +torch_floor_divide(a, b) +torch_floor_divide(a, 1.4) +}
Fmod
+| input | +(Tensor) the dividend |
+
|---|---|
| other | +(Tensor or float) the divisor, which may be either a number or a tensor of the same shape as the dividend |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise remainder of division.
+The dividend and divisor may contain both for integer and floating point
+numbers. The remainder has the same sign as the dividend input.
When other is a tensor, the shapes of input and
+other must be broadcastable .
+if (torch_is_installed()) { + +torch_fmod(torch_tensor(c(-3., -2, -1, 1, 2, 3)), 2) +torch_fmod(torch_tensor(c(1., 2, 3, 4, 5)), 1.5) +}
Full
+| size | +(int...) a list, tuple, or |
+
|---|---|
| fill_value | +NA the number to fill the output tensor with. |
+
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a tensor of size size filled with fill_value.
In PyTorch 1.5 a bool or integral fill_value will produce a warning if
+dtype or out are not set.
+In a future PyTorch release, when dtype and out are not set
+a bool fill_value will return a tensor of torch.bool dtype,
+and an integral fill_value will return a tensor of torch.long dtype.
+
Full_like
+| input | +(Tensor) the size of |
+
|---|---|
| fill_value | +NA the number to fill the output tensor with. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| memory_format | +( |
+
memory_format=torch.preserve_format) -> Tensor
+Returns a tensor with the same size as input filled with fill_value.
+torch_full_like(input, fill_value) is equivalent to
+torch_full(input.size(), fill_value, dtype=input.dtype, layout=input.layout, device=input.device).
Gather
+| input | +(Tensor) the source tensor |
+
|---|---|
| dim | +(int) the axis along which to index |
+
| index | +(LongTensor) the indices of elements to gather |
+
| out | +(Tensor, optional) the destination tensor |
+
| sparse_grad | +(bool,optional) If |
+
Gathers values along an axis specified by dim.
For a 3-D tensor the output is specified by::
out[i][j][k] = input[index[i][j][k]][j][k] # if dim == 0 +out[i][j][k] = input[i][index[i][j][k]][k] # if dim == 1 +out[i][j][k] = input[i][j][index[i][j][k]] # if dim == 2+ +
If input is an n-dimensional tensor with size
+\((x_0, x_1..., x_{i-1}, x_i, x_{i+1}, ..., x_{n-1})\)
+and dim = i, then index must be an \(n\)-dimensional tensor with
+size \((x_0, x_1, ..., x_{i-1}, y, x_{i+1}, ..., x_{n-1})\) where \(y \geq 1\)
+and out will have the same size as index.
+if (torch_is_installed()) { + +t = torch_tensor(matrix(c(1,2,3,4), ncol = 2, byrow = TRUE)) +torch_gather(t, 2, torch_tensor(matrix(c(1,1,2,1), ncol = 2, byrow=TRUE), dtype = torch_int64())) +}
Ge
+| input | +(Tensor) the tensor to compare |
+
|---|---|
| other | +(Tensor or float) the tensor or value to compare |
+
| out | +(Tensor, optional) the output tensor that must be a |
+
Computes \(\mbox{input} \geq \mbox{other}\) element-wise.
+The second argument can be a number or a tensor whose shape is +broadcastable with the first argument.
+ ++if (torch_is_installed()) { + +torch_ge(torch_tensor(matrix(1:4, ncol = 2, byrow=TRUE)), + torch_tensor(matrix(c(1,1,4,4), ncol = 2, byrow=TRUE))) +}
A torch_generator is an object which manages the state of the algorithm
+that produces pseudo random numbers. Used as a keyword argument in many
+In-place random sampling functions.
torch_generator()
+
+
+
+ +if (torch_is_installed()) { + +# Via string +generator <- torch_generator() +generator$current_seed() +generator$set_current_seed(1234567L) +generator$current_seed() + + +}
Geqrf
+| input | +(Tensor) the input matrix |
+
|---|---|
| out | +(tuple, optional) the output tuple of (Tensor, Tensor) |
+
This is a low-level function for calling LAPACK directly. This function
+returns a namedtuple (a, tau) as defined in LAPACK documentation for geqrf_ .
You'll generally want to use torch_qr instead.
Computes a QR decomposition of input, but without constructing
+\(Q\) and \(R\) as explicit separate matrices.
Rather, this directly calls the underlying LAPACK function ?geqrf
+which produces a sequence of 'elementary reflectors'.
See LAPACK documentation for geqrf_ for further details.
Ger
+| input | +(Tensor) 1-D input vector |
+
|---|---|
| vec2 | +(Tensor) 1-D input vector |
+
| out | +(Tensor, optional) optional output matrix |
+
This function does not broadcast .
+Outer product of input and vec2.
+If input is a vector of size \(n\) and vec2 is a vector of
+size \(m\), then out must be a matrix of size \((n \times m)\).
+if (torch_is_installed()) { + +v1 = torch_arange(1., 5.) +v2 = torch_arange(1., 4.) +torch_ger(v1, v2) +}
Gt
+| input | +(Tensor) the tensor to compare |
+
|---|---|
| other | +(Tensor or float) the tensor or value to compare |
+
| out | +(Tensor, optional) the output tensor that must be a |
+
Computes \(\mbox{input} > \mbox{other}\) element-wise.
+The second argument can be a number or a tensor whose shape is +broadcastable with the first argument.
+ ++if (torch_is_installed()) { + +torch_gt(torch_tensor(matrix(1:4, ncol = 2, byrow=TRUE)), + torch_tensor(matrix(c(1,1,4,4), ncol = 2, byrow=TRUE))) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_hamming_window.RdHamming_window
+| window_length | +(int) the size of returned window |
+
|---|---|
| periodic | +(bool, optional) If True, returns a window to be used as periodic function. If False, return a symmetric window. |
+
| alpha | +(float, optional) The coefficient \(\alpha\) in the equation above |
+
| beta | +(float, optional) The coefficient \(\beta\) in the equation above |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
If `window_length` \eqn{=1}, the returned window contains a single value 1.
+
+
+This is a generalized version of `torch_hann_window`. ++ +
Hamming window function.
+$$ + w[n] = \alpha - \beta\ \cos \left( \frac{2 \pi n}{N - 1} \right), +$$ +where \(N\) is the full window size.
+The input window_length is a positive integer controlling the
+returned window size. periodic flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+torch_stft. Therefore, if periodic is true, the \(N\) in
+above formula is in fact \(\mbox{window\_length} + 1\). Also, we always have
+torch_hamming_window(L, periodic=True) equal to
+torch_hamming_window(L + 1, periodic=False)[:-1]).
Hann_window
+| window_length | +(int) the size of returned window |
+
|---|---|
| periodic | +(bool, optional) If True, returns a window to be used as periodic function. If False, return a symmetric window. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
If `window_length` \eqn{=1}, the returned window contains a single value 1.
+
+
+ Hann window function.
+$$ + w[n] = \frac{1}{2}\ \left[1 - \cos \left( \frac{2 \pi n}{N - 1} \right)\right] = + \sin^2 \left( \frac{\pi n}{N - 1} \right), +$$ +where \(N\) is the full window size.
+The input window_length is a positive integer controlling the
+returned window size. periodic flag determines whether the returned
+window trims off the last duplicate value from the symmetric window and is
+ready to be used as a periodic window with functions like
+torch_stft. Therefore, if periodic is true, the \(N\) in
+above formula is in fact \(\mbox{window\_length} + 1\). Also, we always have
+torch_hann_window(L, periodic=True) equal to
+torch_hann_window(L + 1, periodic=False)[:-1]).
Histc
+| input | +(Tensor) the input tensor. |
+
|---|---|
| bins | +(int) number of histogram bins |
+
| min | +(int) lower end of the range (inclusive) |
+
| max | +(int) upper end of the range (inclusive) |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the histogram of a tensor.
+The elements are sorted into equal width bins between min and
+max. If min and max are both zero, the minimum and
+maximum values of the data are used.
+
Ifft
+| input | +(Tensor) the input tensor of at least |
+
|---|---|
| signal_ndim | +(int) the number of dimensions in each signal. |
+
| normalized | +(bool, optional) controls whether to return normalized results. Default: |
+
For CUDA tensors, an LRU cache is used for cuFFT plans to speed up +repeatedly running FFT methods on tensors of same geometry with same +configuration. See cufft-plan-cache for more details on how to +monitor and control the cache. ++ +
Complex-to-complex Inverse Discrete Fourier Transform
+This method computes the complex-to-complex inverse discrete Fourier +transform. Ignoring the batch dimensions, it computes the following +expression:
+$$
+ X[\omega_1, \dots, \omega_d] =
+ \frac{1}{\prod_{i=1}^d N_i} \sum_{n_1=0}^{N_1-1} \dots \sum_{n_d=0}^{N_d-1} x[n_1, \dots, n_d]
+ e^{\ j\ 2 \pi \sum_{i=0}^d \frac{\omega_i n_i}{N_i}},
+$$
+where \(d\) = signal_ndim is number of dimensions for the
+signal, and \(N_i\) is the size of signal dimension \(i\).
The argument specifications are almost identical with torch_fft.
+However, if normalized is set to True, this instead returns the
+results multiplied by \(\sqrt{\prod_{i=1}^d N_i}\), to become a unitary
+operator. Therefore, to invert a torch_fft, the normalized
+argument should be set identically for torch_fft.
Returns the real and the imaginary parts together as one tensor of the same
+shape of input.
The inverse of this function is torch_fft.
For CPU tensors, this method is currently only available with MKL. Use
+torch_backends.mkl.is_available to check if MKL is installed.
+if (torch_is_installed()) { + +x = torch_randn(c(3, 3, 2)) +x +y = torch_fft(x, 2) +torch_ifft(y, 2) # recover x +}
Imag
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns the imaginary part of the input tensor.
Not yet implemented.
+$$ + \mbox{out}_{i} = imag(\mbox{input}_{i}) +$$
+ ++if (torch_is_installed()) { +if (FALSE) { +torch_imag(torch_tensor(c(-1 + 1i, -2 + 2i, 3 - 3i))) +} +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_index_select.RdIndex_select
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension in which we index |
+
| index | +(LongTensor) the 1-D tensor containing the indices to index |
+
| out | +(Tensor, optional) the output tensor. |
+
The returned tensor does not use the same storage as the original
+tensor. If out has a different shape than expected, we
+silently change it to the correct shape, reallocating the underlying
+storage if necessary.
Returns a new tensor which indexes the input tensor along dimension
+dim using the entries in index which is a LongTensor.
The returned tensor has the same number of dimensions as the original tensor
+(input). The dim\ th dimension has the same size as the length
+of index; other dimensions have the same size as in the original tensor.
+if (torch_is_installed()) { + +x = torch_randn(c(3, 4)) +x +indices = torch_tensor(c(1, 3), dtype = torch_int64()) +torch_index_select(x, 1, indices) +torch_index_select(x, 2, indices) +}
Inverse
+| input | +(Tensor) the input tensor of size \((*, n, n)\) where |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Irrespective of the original strides, the returned tensors will be +transposed, i.e. with strides like `input.contiguous().transpose(-2, -1).stride()` ++ +
Takes the inverse of the square matrix input. input can be batches
+of 2D square tensors, in which case this function would return a tensor composed of
+individual inverses.
+if (torch_is_installed()) { +if (FALSE) { +x = torch_rand(c(4, 4)) +y = torch_inverse(x) +z = torch_mm(x, y) +z +torch_max(torch_abs(z - torch_eye(4))) # Max non-zero +# Batched inverse example +x = torch_randn(c(2, 3, 4, 4)) +y = torch_inverse(x) +z = torch_matmul(x, y) +torch_max(torch_abs(z - torch_eye(4)$expand_as(x))) # Max non-zero +} +}
Irfft
+| input | +(Tensor) the input tensor of at least |
+
|---|---|
| signal_ndim | +(int) the number of dimensions in each signal. |
+
| normalized | +(bool, optional) controls whether to return normalized results. Default: |
+
| onesided | +(bool, optional) controls whether |
+
| signal_sizes | +(list or |
+
Due to the conjugate symmetry, `input` do not need to contain the full +complex frequency values. Roughly half of the values will be sufficient, as +is the case when `input` is given by [`~torch.rfft`] with +``rfft(signal, onesided=True)``. In such case, set the `onesided` +argument of this method to ``True``. Moreover, the original signal shape +information can sometimes be lost, optionally set `signal_sizes` to be +the size of the original signal (without the batch dimensions if in batched +mode) to recover it with correct shape. + +Therefore, to invert an [torch_rfft()], the `normalized` and +`onesided` arguments should be set identically for [torch_irfft()], +and preferably a `signal_sizes` is given to avoid size mismatch. See the +example below for a case of size mismatch. + +See [torch_rfft()] for details on conjugate symmetry. ++ +
The inverse of this function is torch_rfft().
For CUDA tensors, an LRU cache is used for cuFFT plans to speed up +repeatedly running FFT methods on tensors of same geometry with same +configuration. See cufft-plan-cache for more details on how to +monitor and control the cache. ++ +
Complex-to-real Inverse Discrete Fourier Transform
+This method computes the complex-to-real inverse discrete Fourier transform.
+It is mathematically equivalent with torch_ifft with differences only in
+formats of the input and output.
The argument specifications are almost identical with torch_ifft.
+Similar to torch_ifft, if normalized is set to True,
+this normalizes the result by multiplying it with
+\(\sqrt{\prod_{i=1}^K N_i}\) so that the operator is unitary, where
+\(N_i\) is the size of signal dimension \(i\).
Generally speaking, input to this function should contain values
+following conjugate symmetry. Note that even if onesided is
+True, often symmetry on some part is still needed. When this
+requirement is not satisfied, the behavior of torch_irfft is
+undefined. Since torch_autograd.gradcheck estimates numerical
+Jacobian with point perturbations, torch_irfft will almost
+certainly fail the check.
For CPU tensors, this method is currently only available with MKL. Use
+torch_backends.mkl.is_available to check if MKL is installed.
+if (torch_is_installed()) { + +x = torch_randn(c(4, 4)) +torch_rfft(x, 2, onesided=TRUE) +x = torch_randn(c(4, 5)) +torch_rfft(x, 2, onesided=TRUE) +y = torch_rfft(x, 2, onesided=TRUE) +torch_irfft(y, 2, onesided=TRUE, signal_sizes=c(4,5)) # recover x +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_is_floating_point.RdIs_floating_point
+| input | +(Tensor) the PyTorch tensor to test |
+
|---|
Returns True if the data type of input is a floating point data type i.e.,
+one of torch_float64, torch.float32 and torch.float16.
Verifies if torch is installed
+torch_is_installed()
+
+
+
+ Kthvalue
+| input | +(Tensor) the input tensor. |
+
|---|---|
| k | +(int) k for the k-th smallest element |
+
| dim | +(int, optional) the dimension to find the kth value along |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(tuple, optional) the output tuple of (Tensor, LongTensor) can be optionally given to be used as output buffers |
+
Returns a namedtuple (values, indices) where values is the k th
+smallest element of each row of the input tensor in the given dimension
+dim. And indices is the index location of each element found.
If dim is not given, the last dimension of the input is chosen.
If keepdim is True, both the values and indices tensors
+are the same size as input, except in the dimension dim where
+they are of size 1. Otherwise, dim is squeezed
+(see torch_squeeze), resulting in both the values and
+indices tensors having 1 fewer dimension than the input tensor.
+if (torch_is_installed()) { + +x = torch_arange(1., 6.) +x +torch_kthvalue(x, 4) +x=torch_arange(1.,7.)$resize_(c(2,3)) +x +torch_kthvalue(x, 2, 1, TRUE) +}
Creates the corresponding layout
+torch_strided() + +torch_sparse_coo()+ + + +
Le
+| input | +(Tensor) the tensor to compare |
+
|---|---|
| other | +(Tensor or float) the tensor or value to compare |
+
| out | +(Tensor, optional) the output tensor that must be a |
+
Computes \(\mbox{input} \leq \mbox{other}\) element-wise.
+The second argument can be a number or a tensor whose shape is +broadcastable with the first argument.
+ ++if (torch_is_installed()) { + +torch_le(torch_tensor(matrix(1:4, ncol = 2, byrow=TRUE)), + torch_tensor(matrix(c(1,1,4,4), ncol = 2, byrow=TRUE))) +}
Lerp
+| input | +(Tensor) the tensor with the starting points |
+
|---|---|
| end | +(Tensor) the tensor with the ending points |
+
| weight | +(float or tensor) the weight for the interpolation formula |
+
| out | +(Tensor, optional) the output tensor. |
+
Does a linear interpolation of two tensors start (given by input) and end based
+on a scalar or tensor weight and returns the resulting out tensor.
$$
+ \mbox{out}_i = \mbox{start}_i + \mbox{weight}_i \times (\mbox{end}_i - \mbox{start}_i)
+$$
+The shapes of start and end must be
+broadcastable . If weight is a tensor, then
+the shapes of weight, start, and end must be broadcastable .
+if (torch_is_installed()) { + +start = torch_arange(1., 5.) +end = torch_empty(4)$fill_(10) +start +end +torch_lerp(start, end, 0.5) +torch_lerp(start, end, torch_full_like(start, 0.5)) +}
Lgamma
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the logarithm of the gamma function on input.
$$ + \mbox{out}_{i} = \log \Gamma(\mbox{input}_{i}) +$$
+ ++
Linspace
+| start | +(float) the starting value for the set of points |
+
|---|---|
| end | +(float) the ending value for the set of points |
+
| steps | +(int) number of points to sample between |
+
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a one-dimensional tensor of steps
+equally spaced points between start and end.
The output tensor is 1-D of size steps.
+if (torch_is_installed()) { + +torch_linspace(3, 10, steps=5) +torch_linspace(-10, 10, steps=5) +torch_linspace(start=-10, end=10, steps=5) +torch_linspace(start=-10, end=10, steps=1) +}
Loads a saved object
+torch_load(path)+ +
| path | +a path to the saved object |
+
|---|
Other torch_save:
+torch_save()
Log10
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the logarithm to the base 10 of the elements
+of input.
$$ + y_{i} = \log_{10} (x_{i}) +$$
+ ++
Log1p
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
This function is more accurate than torch_log for small
+values of input
Returns a new tensor with the natural logarithm of (1 + input).
$$ + y_i = \log_{e} (x_i + 1) +$$
+ ++
Log2
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the logarithm to the base 2 of the elements
+of input.
$$ + y_{i} = \log_{2} (x_{i}) +$$
+ ++
Logdet
+| input | +(Tensor) the input tensor of size |
+
|---|
Result is ``-inf`` if `input` has zero log determinant, and is ``nan`` if +`input` has negative determinant. ++ +
Backward through `logdet` internally uses SVD results when `input` +is not invertible. In this case, double backward through `logdet` will +be unstable in when `input` doesn't have distinct singular values. See +`~torch.svd` for details. ++ +
Calculates log determinant of a square matrix or batches of square matrices.
+ ++if (torch_is_installed()) { + +A = torch_randn(c(3, 3)) +torch_det(A) +torch_logdet(A) +A +A$det() +A$det()$log() +}
Logical_and
+| input | +(Tensor) the input tensor. |
+
|---|---|
| other | +(Tensor) the tensor to compute AND with |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise logical AND of the given input tensors. Zeros are treated as False and nonzeros are
+treated as True.
+if (torch_is_installed()) { + +torch_logical_and(torch_tensor(c(TRUE, FALSE, TRUE)), torch_tensor(c(TRUE, FALSE, FALSE))) +a = torch_tensor(c(0, 1, 10, 0), dtype=torch_int8()) +b = torch_tensor(c(4, 0, 1, 0), dtype=torch_int8()) +torch_logical_and(a, b) +if (FALSE) { +torch_logical_and(a, b, out=torch_empty(4, dtype=torch_bool())) +} +}
Logical_not
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise logical NOT of the given input tensor. If not specified, the output tensor will have the bool
+dtype. If the input tensor is not a bool tensor, zeros are treated as False and non-zeros are treated as True.
+if (torch_is_installed()) { + +torch_logical_not(torch_tensor(c(TRUE, FALSE))) +torch_logical_not(torch_tensor(c(0, 1, -10), dtype=torch_int8())) +torch_logical_not(torch_tensor(c(0., 1.5, -10.), dtype=torch_double())) +}
Logical_or
+| input | +(Tensor) the input tensor. |
+
|---|---|
| other | +(Tensor) the tensor to compute OR with |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise logical OR of the given input tensors. Zeros are treated as False and nonzeros are
+treated as True.
+if (torch_is_installed()) { + +torch_logical_or(torch_tensor(c(TRUE, FALSE, TRUE)), torch_tensor(c(TRUE, FALSE, FALSE))) +a = torch_tensor(c(0, 1, 10, 0), dtype=torch_int8()) +b = torch_tensor(c(4, 0, 1, 0), dtype=torch_int8()) +torch_logical_or(a, b) +if (FALSE) { +torch_logical_or(a$double(), b$double()) +torch_logical_or(a$double(), b) +torch_logical_or(a, b, out=torch_empty(4, dtype=torch_bool())) +} +}
Logical_xor
+| input | +(Tensor) the input tensor. |
+
|---|---|
| other | +(Tensor) the tensor to compute XOR with |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise logical XOR of the given input tensors. Zeros are treated as False and nonzeros are
+treated as True.
+if (torch_is_installed()) { + +torch_logical_xor(torch_tensor(c(TRUE, FALSE, TRUE)), torch_tensor(c(TRUE, FALSE, FALSE))) +a = torch_tensor(c(0, 1, 10, 0), dtype=torch_int8()) +b = torch_tensor(c(4, 0, 1, 0), dtype=torch_int8()) +torch_logical_xor(a, b) +torch_logical_xor(a$to(dtype=torch_double()), b$to(dtype=torch_double())) +torch_logical_xor(a$to(dtype=torch_double()), b) +}
Logspace
+| start | +(float) the starting value for the set of points |
+
|---|---|
| end | +(float) the ending value for the set of points |
+
| steps | +(int) number of points to sample between |
+
| base | +(float) base of the logarithm function. Default: |
+
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a one-dimensional tensor of steps points
+logarithmically spaced with base base between
+\({\mbox{base}}^{\mbox{start}}\) and \({\mbox{base}}^{\mbox{end}}\).
The output tensor is 1-D of size steps.
+if (torch_is_installed()) { + +torch_logspace(start=-10, end=10, steps=5) +torch_logspace(start=0.1, end=1.0, steps=5) +torch_logspace(start=0.1, end=1.0, steps=1) +torch_logspace(start=2, end=2, steps=1, base=2) +}
Logsumexp
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int or tuple of ints) the dimension or dimensions to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the log of summed exponentials of each row of the input
+tensor in the given dimension dim. The computation is numerically
+stabilized.
For summation index \(j\) given by dim and other indices \(i\), the result is
$$ + \mbox{logsumexp}(x)_{i} = \log \sum_j \exp(x_{ij}) +$$
+If keepdim is True, the output tensor is of the same size
+as input except in the dimension(s) dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in the
+output tensor having 1 (or len(dim)) fewer dimension(s).
+
Lstsq
+| input | +(Tensor) the matrix \(B\) |
+
|---|---|
| A | +(Tensor) the \(m\) by \(n\) matrix \(A\) |
+
| out | +(tuple, optional) the optional destination tensor |
+
The case when \eqn{m < n} is not supported on the GPU.
+
+
+ Computes the solution to the least squares and least norm problems for a full +rank matrix \(A\) of size \((m \times n)\) and a matrix \(B\) of +size \((m \times k)\).
+If \(m \geq n\), torch_lstsq() solves the least-squares problem:
$$
+ \begin{array}{ll}
+ \min_X & \|AX-B\|_2.
+ \end{array}
+$$
+If \(m < n\), torch_lstsq() solves the least-norm problem:
$$ + \begin{array}{llll} + \min_X & \|X\|_2 & \mbox{subject to} & AX = B. + \end{array} +$$ +Returned tensor \(X\) has shape \((\mbox{max}(m, n) \times k)\). The first \(n\) +rows of \(X\) contains the solution. If \(m \geq n\), the residual sum of squares +for the solution in each column is given by the sum of squares of elements in the +remaining \(m - n\) rows of that column.
+ ++if (torch_is_installed()) { + +A = torch_tensor(rbind( + c(1,1,1), + c(2,3,4), + c(3,5,2), + c(4,2,5), + c(5,4,3) +)) +B = torch_tensor(rbind( + c(-10, -3), + c(12, 14), + c(14, 12), + c(16, 16), + c(18, 16) +)) +out = torch_lstsq(B, A) +out[[1]] +}
Lt
+| input | +(Tensor) the tensor to compare |
+
|---|---|
| other | +(Tensor or float) the tensor or value to compare |
+
| out | +(Tensor, optional) the output tensor that must be a |
+
Computes \(\mbox{input} < \mbox{other}\) element-wise.
+The second argument can be a number or a tensor whose shape is +broadcastable with the first argument.
+ ++if (torch_is_installed()) { + +torch_lt(torch_tensor(matrix(1:4, ncol = 2, byrow=TRUE)), + torch_tensor(matrix(c(1,1,4,4), ncol = 2, byrow=TRUE))) +}
Computes the LU factorization of a matrix or batches of matrices A. Returns a +tuple containing the LU factorization and pivots of A. Pivoting is done if pivot +is set to True.
+torch_lu(A, pivot = TRUE, get_infos = FALSE, out = NULL)+ +
| A | +(Tensor) the tensor to factor of size (, m, n)(,m,n) |
+
|---|---|
| pivot | +(bool, optional) – controls whether pivoting is done. Default: TRUE |
+
| get_infos | +(bool, optional) – if set to True, returns an info IntTensor. Default: FALSE |
+
| out | +(tuple, optional) – optional output tuple. If get_infos is True, then the elements +in the tuple are Tensor, IntTensor, and IntTensor. If get_infos is False, then the +elements in the tuple are Tensor, IntTensor. Default: NULL |
+
+
Lu_solve
+| b | +(Tensor) the RHS tensor of size \((*, m, k)\), where \(*\) is zero or more batch dimensions. |
+
|---|---|
| LU_data | +(Tensor) the pivoted LU factorization of A from |
+
| LU_pivots | +(IntTensor) the pivots of the LU factorization from |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the LU solve of the linear system \(Ax = b\) using the partially pivoted
+LU factorization of A from torch_lu.
+if (torch_is_installed()) { +A = torch_randn(c(2, 3, 3)) +b = torch_randn(c(2, 3, 1)) +out = torch_lu(A) +x = torch_lu_solve(b, out[[1]], out[[2]]) +torch_norm(torch_bmm(A, x) - b) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_masked_select.RdMasked_select
+| input | +(Tensor) the input tensor. |
+
|---|---|
| mask | +(BoolTensor) the tensor containing the binary mask to index with |
+
| out | +(Tensor, optional) the output tensor. |
+
The returned tensor does not use the same storage +as the original tensor
+Returns a new 1-D tensor which indexes the input tensor according to
+the boolean mask mask which is a BoolTensor.
The shapes of the mask tensor and the input tensor don't need
+to match, but they must be broadcastable .
+if (torch_is_installed()) { + +x = torch_randn(c(3, 4)) +x +mask = x$ge(0.5) +mask +torch_masked_select(x, mask) +}
Matmul
+| input | +(Tensor) the first tensor to be multiplied |
+
|---|---|
| other | +(Tensor) the second tensor to be multiplied |
+
| out | +(Tensor, optional) the output tensor. |
+
The 1-dimensional dot product version of this function does not support an `out` parameter. ++ +
Matrix product of two tensors.
+The behavior depends on the dimensionality of the tensors as follows:
If both tensors are 1-dimensional, the dot product (scalar) is returned.
If both arguments are 2-dimensional, the matrix-matrix product is returned.
If the first argument is 1-dimensional and the second argument is 2-dimensional, +a 1 is prepended to its dimension for the purpose of the matrix multiply. +After the matrix multiply, the prepended dimension is removed.
If the first argument is 2-dimensional and the second argument is 1-dimensional, +the matrix-vector product is returned.
If both arguments are at least 1-dimensional and at least one argument is
+N-dimensional (where N > 2), then a batched matrix multiply is returned. If the first
+argument is 1-dimensional, a 1 is prepended to its dimension for the purpose of the
+batched matrix multiply and removed after. If the second argument is 1-dimensional, a
+1 is appended to its dimension for the purpose of the batched matrix multiple and removed after.
+The non-matrix (i.e. batch) dimensions are broadcasted (and thus
+must be broadcastable). For example, if input is a
+\((j \times 1 \times n \times m)\) tensor and other is a \((k \times m \times p)\)
+tensor, out will be an \((j \times k \times n \times p)\) tensor.
+if (torch_is_installed()) { + +# vector x vector +tensor1 = torch_randn(c(3)) +tensor2 = torch_randn(c(3)) +torch_matmul(tensor1, tensor2) +# matrix x vector +tensor1 = torch_randn(c(3, 4)) +tensor2 = torch_randn(c(4)) +torch_matmul(tensor1, tensor2) +# batched matrix x broadcasted vector +tensor1 = torch_randn(c(10, 3, 4)) +tensor2 = torch_randn(c(4)) +torch_matmul(tensor1, tensor2) +# batched matrix x batched matrix +tensor1 = torch_randn(c(10, 3, 4)) +tensor2 = torch_randn(c(10, 4, 5)) +torch_matmul(tensor1, tensor2) +# batched matrix x broadcasted matrix +tensor1 = torch_randn(c(10, 3, 4)) +tensor2 = torch_randn(c(4, 5)) +torch_matmul(tensor1, tensor2) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_matrix_power.RdMatrix_power
+| input | +(Tensor) the input tensor. |
+
|---|---|
| n | +(int) the power to raise the matrix to |
+
Returns the matrix raised to the power n for square matrices.
+For batch of matrices, each individual matrix is raised to the power n.
If n is negative, then the inverse of the matrix (if invertible) is
+raised to the power n. For a batch of matrices, the batched inverse
+(if invertible) is raised to the power n. If n is 0, then an identity matrix
+is returned.
+
Matrix_rank
+| input | +(Tensor) the input 2-D tensor |
+
|---|---|
| tol | +(float, optional) the tolerance value. Default: |
+
| symmetric | +(bool, optional) indicates whether |
+
Returns the numerical rank of a 2-D tensor. The method to compute the
+matrix rank is done using SVD by default. If symmetric is True,
+then input is assumed to be symmetric, and the computation of the
+rank is done by obtaining the eigenvalues.
tol is the threshold below which the singular values (or the eigenvalues
+when symmetric is True) are considered to be 0. If tol is not
+specified, tol is set to S.max() * max(S.size()) * eps where S is the
+singular values (or the eigenvalues when symmetric is True), and eps
+is the epsilon value for the datatype of input.
+
Max
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(tuple, optional) the result tuple of two output tensors (max, max_indices) |
+
| other | +(Tensor) the second input tensor |
+
When the shapes do not match, the shape of the returned output tensor +follows the broadcasting rules .
+Returns the maximum value of all elements in the input tensor.
Returns a namedtuple (values, indices) where values is the maximum
+value of each row of the input tensor in the given dimension
+dim. And indices is the index location of each maximum value found
+(argmax).
indices does not necessarily contain the first occurrence of each
+maximal value found, unless it is unique.
+The exact implementation details are device-specific.
+Do not expect the same result when run on CPU and GPU in general.
If keepdim is True, the output tensors are of the same size
+as input except in the dimension dim where they are of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting
+in the output tensors having 1 fewer dimension than input.
Each element of the tensor input is compared with the corresponding
+element of the tensor other and an element-wise maximum is taken.
The shapes of input and other don't need to match,
+but they must be broadcastable .
$$ + \mbox{out}_i = \max(\mbox{tensor}_i, \mbox{other}_i) +$$
+ ++if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_max(a) + + +a = torch_randn(c(4, 4)) +a +torch_max(a, dim = 1) + + +a = torch_randn(c(4)) +a +b = torch_randn(c(4)) +b +torch_max(a, other = b) +}
Mean
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int or tuple of ints) the dimension or dimensions to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the mean value of all elements in the input tensor.
Returns the mean value of each row of the input tensor in the given
+dimension dim. If dim is a list of dimensions,
+reduce over all of them.
If keepdim is True, the output tensor is of the same size
+as input except in the dimension(s) dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in the
+output tensor having 1 (or len(dim)) fewer dimension(s).
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_mean(a) + + +a = torch_randn(c(4, 4)) +a +torch_mean(a, 1) +torch_mean(a, 1, TRUE) +}
Median
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(tuple, optional) the result tuple of two output tensors (max, max_indices) |
+
Returns the median value of all elements in the input tensor.
Returns a namedtuple (values, indices) where values is the median
+value of each row of the input tensor in the given dimension
+dim. And indices is the index location of each median value found.
By default, dim is the last dimension of the input tensor.
If keepdim is True, the output tensors are of the same size
+as input except in the dimension dim where they are of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in
+the outputs tensor having 1 fewer dimension than input.
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_median(a) + + +a = torch_randn(c(4, 5)) +a +torch_median(a, 1) +}
Returns the correspondent memory format.
+torch_contiguous_format() + +torch_preserve_format() + +torch_channels_last_format()+ + + +
Meshgrid
+| tensors | +(list of Tensor) list of scalars or 1 dimensional tensors. Scalars will be |
+
|---|---|
| treated | +(1,) |
+
Take \(N\) tensors, each of which can be either scalar or 1-dimensional
+vector, and create \(N\) N-dimensional grids, where the \(i\) th grid is defined by
+expanding the \(i\) th input over dimensions defined by other inputs.
+if (torch_is_installed()) { + +x = torch_tensor(c(1, 2, 3)) +y = torch_tensor(c(4, 5, 6)) +out = torch_meshgrid(list(x, y)) +out +}
Min
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(tuple, optional) the tuple of two output tensors (min, min_indices) |
+
| other | +(Tensor) the second input tensor |
+
When the shapes do not match, the shape of the returned output tensor +follows the broadcasting rules .
+Returns the minimum value of all elements in the input tensor.
Returns a namedtuple (values, indices) where values is the minimum
+value of each row of the input tensor in the given dimension
+dim. And indices is the index location of each minimum value found
+(argmin).
indices does not necessarily contain the first occurrence of each
+minimal value found, unless it is unique.
+The exact implementation details are device-specific.
+Do not expect the same result when run on CPU and GPU in general.
If keepdim is True, the output tensors are of the same size as
+input except in the dimension dim where they are of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in
+the output tensors having 1 fewer dimension than input.
Each element of the tensor input is compared with the corresponding
+element of the tensor other and an element-wise minimum is taken.
+The resulting tensor is returned.
The shapes of input and other don't need to match,
+but they must be broadcastable .
$$ + \mbox{out}_i = \min(\mbox{tensor}_i, \mbox{other}_i) +$$
+ ++if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_min(a) + + +a = torch_randn(c(4, 4)) +a +torch_min(a, dim = 1) + + +a = torch_randn(c(4)) +a +b = torch_randn(c(4)) +b +torch_min(a, other = b) +}
Mm
+| input | +(Tensor) the first matrix to be multiplied |
+
|---|---|
| mat2 | +(Tensor) the second matrix to be multiplied |
+
| out | +(Tensor, optional) the output tensor. |
+
This function does not broadcast .
+For broadcasting matrix products, see torch_matmul.
Performs a matrix multiplication of the matrices input and mat2.
If input is a \((n \times m)\) tensor, mat2 is a
+\((m \times p)\) tensor, out will be a \((n \times p)\) tensor.
+if (torch_is_installed()) { + +mat1 = torch_randn(c(2, 3)) +mat2 = torch_randn(c(3, 3)) +torch_mm(mat1, mat2) +}
Mode
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the dimension to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(tuple, optional) the result tuple of two output tensors (values, indices) |
+
This function is not defined for torch_cuda.Tensor yet.
Returns a namedtuple (values, indices) where values is the mode
+value of each row of the input tensor in the given dimension
+dim, i.e. a value which appears most often
+in that row, and indices is the index location of each mode value found.
By default, dim is the last dimension of the input tensor.
If keepdim is True, the output tensors are of the same size as
+input except in the dimension dim where they are of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting
+in the output tensors having 1 fewer dimension than input.
+
Mul
+| input | +NA |
+
|---|---|
| value | +(Number) the number to be multiplied to each element of |
+
| out | +NA |
+
| input | +(Tensor) the first multiplicand tensor |
+
| other | +(Tensor) the second multiplicand tensor |
+
| out | +(Tensor, optional) the output tensor. |
+
Multiplies each element of the input input with the scalar
+other and returns a new resulting tensor.
$$
+ \mbox{out}_i = \mbox{other} \times \mbox{input}_i
+$$
+If input is of type FloatTensor or DoubleTensor, other
+should be a real number, otherwise it should be an integer
Each element of the tensor input is multiplied by the corresponding
+element of the Tensor other. The resulting tensor is returned.
The shapes of input and other must be
+broadcastable .
$$ + \mbox{out}_i = \mbox{input}_i \times \mbox{other}_i +$$
+ ++if (torch_is_installed()) { + +a = torch_randn(c(3)) +a +torch_mul(a, 100) + + +a = torch_randn(c(4, 1)) +a +b = torch_randn(c(1, 4)) +b +torch_mul(a, b) +}
Multinomial
+| input | +(Tensor) the input tensor containing probabilities |
+
|---|---|
| num_samples | +(int) number of samples to draw |
+
| replacement | +(bool, optional) whether to draw with replacement or not |
+
| generator | +( |
+
| out | +(Tensor, optional) the output tensor. |
+
The rows of `input` do not need to sum to one (in which case we use +the values as weights), but must be non-negative, finite and have +a non-zero sum. ++ +
Indices are ordered from left to right according to when each was sampled +(first samples are placed in first column).
+If input is a vector, out is a vector of size num_samples.
If input is a matrix with m rows, out is an matrix of shape
+\((m \times \mbox{num\_samples})\).
If replacement is True, samples are drawn with replacement.
If not, they are drawn without replacement, which means that when a +sample index is drawn for a row, it cannot be drawn again for that row.
+When drawn without replacement, `num_samples` must be lower than +number of non-zero elements in `input` (or the min number of non-zero +elements in each row of `input` if it is a matrix). ++ +
Returns a tensor where each row contains num_samples indices sampled
+from the multinomial probability distribution located in the corresponding row
+of tensor input.
+if (torch_is_installed()) { + +weights = torch_tensor(c(0, 10, 3, 0), dtype=torch_float()) # create a tensor of weights +torch_multinomial(weights, 2) +torch_multinomial(weights, 4, replacement=TRUE) +}
Mv
+| input | +(Tensor) matrix to be multiplied |
+
|---|---|
| vec | +(Tensor) vector to be multiplied |
+
| out | +(Tensor, optional) the output tensor. |
+
This function does not broadcast .
+Performs a matrix-vector product of the matrix input and the vector
+vec.
If input is a \((n \times m)\) tensor, vec is a 1-D tensor of
+size \(m\), out will be 1-D of size \(n\).
+if (torch_is_installed()) { + +mat = torch_randn(c(2, 3)) +vec = torch_randn(c(3)) +torch_mv(mat, vec) +}
Mvlgamma
+| input | +(Tensor) the tensor to compute the multivariate log-gamma function |
+
|---|---|
| p | +(int) the number of dimensions |
+
Computes the multivariate log-gamma function <https://en.wikipedia.org/wiki/Multivariate_gamma_function>_) with dimension
+\(p\) element-wise, given by
$$ + \log(\Gamma_{p}(a)) = C + \displaystyle \sum_{i=1}^{p} \log\left(\Gamma\left(a - \frac{i - 1}{2}\right)\right) +$$ +where \(C = \log(\pi) \times \frac{p (p - 1)}{4}\) and \(\Gamma(\cdot)\) is the Gamma function.
+All elements must be greater than \(\frac{p - 1}{2}\), otherwise an error would be thrown.
+ ++
Narrow
+| input | +(Tensor) the tensor to narrow |
+
|---|---|
| dim | +(int) the dimension along which to narrow |
+
| start | +(int) the starting dimension |
+
| length | +(int) the distance to the ending dimension |
+
Returns a new tensor that is a narrowed version of input tensor. The
+dimension dim is input from start to start + length. The
+returned tensor and input tensor share the same underlying storage.
+if (torch_is_installed()) { + +x = torch_tensor(matrix(c(1:9), ncol = 3, byrow= TRUE)) +torch_narrow(x, 1, torch_tensor(0L)$sum(dim = 1), 2) +torch_narrow(x, 2, torch_tensor(1L)$sum(dim = 1), 2) +}
Ne
+| input | +(Tensor) the tensor to compare |
+
|---|---|
| other | +(Tensor or float) the tensor or value to compare |
+
| out | +(Tensor, optional) the output tensor that must be a |
+
Computes \(input \neq other\) element-wise.
+The second argument can be a number or a tensor whose shape is +broadcastable with the first argument.
+ ++if (torch_is_installed()) { + +torch_ne(torch_tensor(matrix(1:4, ncol = 2, byrow=TRUE)), + torch_tensor(matrix(rep(c(1,4), each = 2), ncol = 2, byrow=TRUE))) +}
Nonzero
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(LongTensor, optional) the output tensor containing indices |
+
[`torch_nonzero(..., as_tuple=False) <torch.nonzero>`] (default) returns a +2-D tensor where each row is the index for a nonzero value. + +[`torch_nonzero(..., as_tuple=True) <torch.nonzero>`] returns a tuple of 1-D +index tensors, allowing for advanced indexing, so ``x[x.nonzero(as_tuple=True)]`` +gives all nonzero values of tensor ``x``. Of the returned tuple, each index tensor +contains nonzero indices for a certain dimension. + +See below for more details on the two behaviors. ++ +
When as_tuple is False (default):
Returns a tensor containing the indices of all non-zero elements of
+input. Each row in the result contains the indices of a non-zero
+element in input. The result is sorted lexicographically, with
+the last index changing the fastest (C-style).
If input has \(n\) dimensions, then the resulting indices tensor
+out is of size \((z \times n)\), where \(z\) is the total number of
+non-zero elements in the input tensor.
When as_tuple is True:
Returns a tuple of 1-D tensors, one for each dimension in input,
+each containing the indices (in that dimension) of all non-zero elements of
+input .
If input has \(n\) dimensions, then the resulting tuple contains \(n\)
+tensors of size \(z\), where \(z\) is the total number of
+non-zero elements in the input tensor.
As a special case, when input has zero dimensions and a nonzero scalar
+value, it is treated as a one-dimensional tensor with one element.
+
Norm
+| input | +(Tensor) the input tensor |
+
|---|---|
| p | +(int, float, inf, -inf, 'fro', 'nuc', optional) the order of norm. Default: |
+
| dim | +(int, 2-tuple of ints, 2-list of ints, optional) If it is an int, vector norm will be calculated, if it is 2-tuple of ints, matrix norm will be calculated. If the value is None, matrix norm will be calculated when the input tensor only has two dimensions, vector norm will be calculated when the input tensor only has one dimension. If the input tensor has more than two dimensions, the vector norm will be applied to last dimension. |
+
| keepdim | +(bool, optional) whether the output tensors have |
+
| out | +(Tensor, optional) the output tensor. Ignored if |
+
| dtype | +( |
+
Returns the matrix norm or vector norm of a given tensor.
+ ++if (torch_is_installed()) { + +a = torch_arange(0, 9, dtype = torch_float()) +b = a$reshape(list(3, 3)) +torch_norm(a) +torch_norm(b) +torch_norm(a, Inf) +torch_norm(b, Inf) + +}
Normal
+| mean | +(Tensor) the tensor of per-element means |
+
|---|---|
| std | +(Tensor) the tensor of per-element standard deviations |
+
| generator | +( |
+
| out | +(Tensor, optional) the output tensor. |
+
| size | +(int...) a sequence of integers defining the shape of the output tensor. |
+
When the shapes do not match, the shape of mean
+is used as the shape for the returned output tensor
Returns a tensor of random numbers drawn from separate normal distributions +whose mean and standard deviation are given.
+The mean is a tensor with the mean of
+each output element's normal distribution
The std is a tensor with the standard deviation of
+each output element's normal distribution
The shapes of mean and std don't need to match, but the
+total number of elements in each tensor need to be the same.
Similar to the function above, but the means are shared among all drawn +elements.
+Similar to the function above, but the standard-deviations are shared among +all drawn elements.
+Similar to the function above, but the means and standard deviations are shared
+among all drawn elements. The resulting tensor has size given by size.
+if (torch_is_installed()) { + +if (FALSE) { +torch_normal(mean=0, std=torch_arange(1, 0, -0.1)) + + +torch_normal(mean=0.5, std=torch_arange(1., 6.)) + + +torch_normal(mean=torch_arange(1., 6.)) + + +torch_normal(2, 3, size=list(1, 4)) +} +}
Ones
+| size | +(int...) a sequence of integers defining the shape of the output tensor. Can be a variable number of arguments or a collection like a list or tuple. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a tensor filled with the scalar value 1, with the shape defined
+by the variable argument size.
+
Ones_like
+| input | +(Tensor) the size of |
+
|---|---|
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| memory_format | +( |
+
Returns a tensor filled with the scalar value 1, with the same size as
+input. torch_ones_like(input) is equivalent to
+torch_ones(input.size(), dtype=input.dtype, layout=input.layout, device=input.device).
As of 0.4, this function does not support an out keyword. As an alternative,
+the old torch_ones_like(input, out=output) is equivalent to
+torch_ones(input.size(), out=output).
+
Orgqr
+| input | +(Tensor) the |
+
|---|---|
| input2 | +(Tensor) the |
+
Computes the orthogonal matrix Q of a QR factorization, from the (input, input2)
+tuple returned by torch_geqrf.
This directly calls the underlying LAPACK function ?orgqr.
+See LAPACK documentation for orgqr_ for further details.
Ormqr
+| input | +(Tensor) the |
+
|---|---|
| input2 | +(Tensor) the |
+
| input3 | +(Tensor) the matrix to be multiplied. |
+
Multiplies mat (given by input3) by the orthogonal Q matrix of the QR factorization
+formed by torch_geqrf that is represented by (a, tau) (given by (input, input2)).
This directly calls the underlying LAPACK function ?ormqr.
+See LAPACK documentation for ormqr_ for further details.
Pdist
+| input | +NA input tensor of shape \(N \times M\). |
+
|---|---|
| p | +NA p value for the p-norm distance to calculate between each vector pair \(\in [0, \infty]\). |
+
Computes the p-norm distance between every pair of row vectors in the input.
+This is identical to the upper triangular portion, excluding the diagonal, of
+torch_norm(input[:, None] - input, dim=2, p=p). This function will be faster
+if the rows are contiguous.
If input has shape \(N \times M\) then the output will have shape +\(\frac{1}{2} N (N - 1)\).
+This function is equivalent to scipy.spatial.distance.pdist(input, 'minkowski', p=p) if \(p \in (0, \infty)\). When \(p = 0\) it is
+equivalent to scipy.spatial.distance.pdist(input, 'hamming') * M.
+When \(p = \infty\), the closest scipy function is
+scipy.spatial.distance.pdist(xn, lambda x, y: np.abs(x - y).max()).
Pinverse
+| input | +(Tensor) The input tensor of size \((*, m, n)\) where \(*\) is zero or more batch dimensions |
+
|---|---|
| rcond | +(float) A floating point value to determine the cutoff for small singular values. Default: 1e-15 |
+
This method is implemented using the Singular Value Decomposition. ++ +
The pseudo-inverse is not necessarily a continuous function in the elements of the matrix `[1]`_. +Therefore, derivatives are not always existent, and exist for a constant rank only `[2]`_. +However, this method is backprop-able due to the implementation by using SVD results, and +could be unstable. Double-backward will also be unstable due to the usage of SVD internally. +See `~torch.svd` for more details. ++ +
Calculates the pseudo-inverse (also known as the Moore-Penrose inverse) of a 2D tensor.
+Please look at Moore-Penrose inverse_ for more details
+if (torch_is_installed()) { + +input = torch_randn(c(3, 5)) +input +torch_pinverse(input) +# Batched pinverse example +a = torch_randn(c(2,6,3)) +b = torch_pinverse(a) +torch_matmul(b, a) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_pixel_shuffle.RdPixel_shuffle
+| input | +(Tensor) the input tensor |
+
|---|---|
| upscale_factor | +(int) factor to increase spatial resolution by |
+
math:(*, C \times r^2, H, W) to a :
Rearranges elements in a tensor of shape \((*, C \times r^2, H, W)\) to a +tensor of shape \((*, C, H \times r, W \times r)\).
+See ~torch.nn.PixelShuffle for details.
+if (torch_is_installed()) { + +input = torch_randn(c(1, 9, 4, 4)) +output = nnf_pixel_shuffle(input, 3) +print(output$size()) +}
Poisson
+| input | +(Tensor) the input tensor containing the rates of the Poisson distribution |
+
|---|---|
| generator | +( |
+
Returns a tensor of the same size as input with each element
+sampled from a Poisson distribution with rate parameter given by the corresponding
+element in input i.e.,
$$ + \mbox{out}_i \sim \mbox{Poisson}(\mbox{input}_i) +$$
+ ++if (torch_is_installed()) { + +rates = torch_rand(c(4, 4)) * 5 # rate parameter between 0 and 5 +torch_poisson(rates) +}
Polygamma
+| n | +(int) the order of the polygamma function |
+
|---|---|
| input | +(Tensor) the input tensor. |
+
| out | +(Tensor, optional) the output tensor. |
+
This function is not implemented for \eqn{n \geq 2}.
+
+
+ Computes the \(n^{th}\) derivative of the digamma function on input.
+\(n \geq 0\) is called the order of the polygamma function.
$$ + \psi^{(n)}(x) = \frac{d^{(n)}}{dx^{(n)}} \psi(x) +$$
+ ++
Pow
+| input | +(Tensor) the input tensor. |
+
|---|---|
| exponent | +(float or tensor) the exponent value |
+
| out | +(Tensor, optional) the output tensor. |
+
| self | +(float) the scalar base value for the power operation |
+
Takes the power of each element in input with exponent and
+returns a tensor with the result.
exponent can be either a single float number or a Tensor
+with the same number of elements as input.
When exponent is a scalar value, the operation applied is:
$$
+ \mbox{out}_i = x_i^{\mbox{exponent}}
+$$
+When exponent is a tensor, the operation applied is:
$$
+ \mbox{out}_i = x_i^{\mbox{exponent}_i}
+$$
+When exponent is a tensor, the shapes of input
+and exponent must be broadcastable .
self is a scalar float value, and exponent is a tensor.
+The returned tensor out is of the same shape as exponent
The operation applied is:
+$$ + \mbox{out}_i = \mbox{self} ^ {\mbox{exponent}_i} +$$
+ ++if (torch_is_installed()) { + +a = torch_randn(c(4)) +a +torch_pow(a, 2) +exp = torch_arange(1., 5.) +a = torch_arange(1., 5.) +a +exp +torch_pow(a, exp) + + +exp = torch_arange(1., 5.) +base = 2 +torch_pow(base, exp) +}
Prod
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dtype | +( |
+
| dim | +(int) the dimension to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
Returns the product of all elements in the input tensor.
Returns the product of each row of the input tensor in the given
+dimension dim.
If keepdim is True, the output tensor is of the same size
+as input except in the dimension dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in
+the output tensor having 1 fewer dimension than input.
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_prod(a) + + +a = torch_randn(c(4, 2)) +a +torch_prod(a, 1) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_promote_types.RdPromote_types
+| type1 | +( |
+
|---|---|
| type2 | +( |
+
Returns the torch_dtype with the smallest size and scalar kind that is
+not smaller nor of lower kind than either type1 or type2. See type promotion
+documentation for more information on the type
+promotion logic.
+if (torch_is_installed()) { + +torch_promote_types(torch_int32(), torch_float32()) +torch_promote_types(torch_uint8(), torch_long()) +}
Qr
+| input | +(Tensor) the input tensor of size \((*, m, n)\) where |
+
|---|---|
| some | +(bool, optional) Set to |
+
| out | +(tuple, optional) tuple of |
+
precision may be lost if the magnitudes of the elements of input
+are large
While it should always give you a valid decomposition, it may not +give you the same one across platforms - it will depend on your +LAPACK implementation.
+Computes the QR decomposition of a matrix or a batch of matrices input,
+and returns a namedtuple (Q, R) of tensors such that \(\mbox{input} = Q R\)
+with \(Q\) being an orthogonal matrix or batch of orthogonal matrices and
+\(R\) being an upper triangular matrix or batch of upper triangular matrices.
If some is True, then this function returns the thin (reduced) QR factorization.
+Otherwise, if some is False, this function returns the complete QR factorization.
+if (torch_is_installed()) { + +a = torch_tensor(matrix(c(12., -51, 4, 6, 167, -68, -4, 24, -41), ncol = 3, byrow = TRUE)) +out = torch_qr(a) +q = out[[1]] +r = out[[2]] +torch_mm(q, r)$round() +torch_mm(q$t(), q)$round() +}
Creates the corresponding Scheme object
+torch_per_channel_affine() + +torch_per_tensor_affine() + +torch_per_channel_symmetric() + +torch_per_tensor_symmetric()+ + + +
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_quantize_per_channel.RdQuantize_per_channel
+| input | +(Tensor) float tensor to quantize |
+
|---|---|
| scales | +(Tensor) float 1D tensor of scales to use, size should match |
+
| zero_points | +(int) integer 1D tensor of offset to use, size should match |
+
| axis | +(int) dimension on which apply per-channel quantization |
+
| dtype | +( |
+
Converts a float tensor to per-channel quantized tensor with given scales and zero points.
+ ++if (torch_is_installed()) { +x = torch_tensor(matrix(c(-1.0, 0.0, 1.0, 2.0), ncol = 2, byrow = TRUE)) +torch_quantize_per_channel(x, torch_tensor(c(0.1, 0.01)), + torch_tensor(c(10L, 0L)), 0, torch_quint8()) +torch_quantize_per_channel(x, torch_tensor(c(0.1, 0.01)), + torch_tensor(c(10L, 0L)), 0, torch_quint8())$int_repr() +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_quantize_per_tensor.RdQuantize_per_tensor
+| input | +(Tensor) float tensor to quantize |
+
|---|---|
| scale | +(float) scale to apply in quantization formula |
+
| zero_point | +(int) offset in integer value that maps to float zero |
+
| dtype | +( |
+
Converts a float tensor to quantized tensor with given scale and zero point.
+ ++if (torch_is_installed()) { +torch_quantize_per_tensor(torch_tensor(c(-1.0, 0.0, 1.0, 2.0)), 0.1, 10, torch_quint8()) +torch_quantize_per_tensor(torch_tensor(c(-1.0, 0.0, 1.0, 2.0)), 0.1, 10, torch_quint8())$int_repr() +}
Rand
+| size | +(int...) a sequence of integers defining the shape of the output tensor. Can be a variable number of arguments or a collection like a list or tuple. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a tensor filled with random numbers from a uniform distribution +on the interval \([0, 1)\)
+The shape of the tensor is defined by the variable argument size.
+
Rand_like
+| input | +(Tensor) the size of |
+
|---|---|
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| memory_format | +( |
+
Returns a tensor with the same size as input that is filled with
+random numbers from a uniform distribution on the interval \([0, 1)\).
+torch_rand_like(input) is equivalent to
+torch_rand(input.size(), dtype=input.dtype, layout=input.layout, device=input.device).
Randint
+| low | +(int, optional) Lowest integer to be drawn from the distribution. Default: 0. |
+
|---|---|
| high | +(int) One above the highest integer to be drawn from the distribution. |
+
| size | +(tuple) a tuple defining the shape of the output tensor. |
+
| generator | +( |
+
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
+Returns a tensor filled with random integers generated uniformly
+between low (inclusive) and high (exclusive).
The shape of the tensor is defined by the variable argument size.
.. note:
+With the global dtype default (torch_float32), this function returns
+a tensor with dtype torch_int64.
+if (torch_is_installed()) { + +torch_randint(3, 5, list(3)) +torch_randint(0, 10, size = list(2, 2)) +torch_randint(3, 10, list(2, 2)) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_randint_like.RdRandint_like
+| input | +(Tensor) the size of |
+
|---|---|
| low | +(int, optional) Lowest integer to be drawn from the distribution. Default: 0. |
+
| high | +(int) One above the highest integer to be drawn from the distribution. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| memory_format | +( |
+
memory_format=torch.preserve_format) -> Tensor
+Returns a tensor with the same shape as Tensor input filled with
+random integers generated uniformly between low (inclusive) and
+high (exclusive).
.. note:
+With the global dtype default (torch_float32), this function returns
+a tensor with dtype torch_int64.
Randn
+| size | +(int...) a sequence of integers defining the shape of the output tensor. Can be a variable number of arguments or a collection like a list or tuple. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a tensor filled with random numbers from a normal distribution
+with mean 0 and variance 1 (also called the standard normal
+distribution).
$$
+ \mbox{out}_{i} \sim \mathcal{N}(0, 1)
+$$
+The shape of the tensor is defined by the variable argument size.
+
Randn_like
+| input | +(Tensor) the size of |
+
|---|---|
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| memory_format | +( |
+
Returns a tensor with the same size as input that is filled with
+random numbers from a normal distribution with mean 0 and variance 1.
+torch_randn_like(input) is equivalent to
+torch_randn(input.size(), dtype=input.dtype, layout=input.layout, device=input.device).
Randperm
+| n | +(int) the upper bound (exclusive) |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a random permutation of integers from 0 to n - 1.
+
Range
+| start | +(float) the starting value for the set of points. Default: |
+
|---|---|
| end | +(float) the ending value for the set of points |
+
| step | +(float) the gap between each pair of adjacent points. Default: |
+
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a 1-D tensor of size \(\left\lfloor \frac{\mbox{end} - \mbox{start}}{\mbox{step}} \right\rfloor + 1\)
+with values from start to end with step step. Step is
+the gap between two values in the tensor.
$$ + \mbox{out}_{i+1} = \mbox{out}_i + \mbox{step}. +$$
+This function is deprecated in favor of torch_arange.
+
Real
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns the real part of the input tensor. If
+input is a real (non-complex) tensor, this function just
+returns it.
Not yet implemented for complex tensors.
+$$ + \mbox{out}_{i} = real(\mbox{input}_{i}) +$$
+ ++if (torch_is_installed()) { +if (FALSE) { +torch_real(torch_tensor(c(-1 + 1i, -2 + 2i, 3 - 3i))) +} +}
Reciprocal
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the reciprocal of the elements of input
$$ + \mbox{out}_{i} = \frac{1}{\mbox{input}_{i}} +$$
+ ++
Creates the reduction objet
+torch_reduction_sum() + +torch_reduction_mean() + +torch_reduction_none()+ + + +
Remainder
+| input | +(Tensor) the dividend |
+
|---|---|
| other | +(Tensor or float) the divisor that may be either a number or a Tensor of the same shape as the dividend |
+
| out | +(Tensor, optional) the output tensor. |
+
Computes the element-wise remainder of division.
+The divisor and dividend may contain both for integer and floating point +numbers. The remainder has the same sign as the divisor.
+When other is a tensor, the shapes of input and
+other must be broadcastable .
+if (torch_is_installed()) { + +torch_remainder(torch_tensor(c(-3., -2, -1, 1, 2, 3)), 2) +torch_remainder(torch_tensor(c(1., 2, 3, 4, 5)), 1.5) +}
Renorm
+| input | +(Tensor) the input tensor. |
+
|---|---|
| p | +(float) the power for the norm computation |
+
| dim | +(int) the dimension to slice over to get the sub-tensors |
+
| maxnorm | +(float) the maximum norm to keep each sub-tensor under |
+
| out | +(Tensor, optional) the output tensor. |
+
If the norm of a row is lower than maxnorm, the row is unchanged
Returns a tensor where each sub-tensor of input along dimension
+dim is normalized such that the p-norm of the sub-tensor is lower
+than the value maxnorm
+if (torch_is_installed()) { +x = torch_ones(c(3, 3)) +x[2,]$fill_(2) +x[3,]$fill_(3) +x +torch_renorm(x, 1, 1, 5) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_repeat_interleave.RdRepeat_interleave
+| input | +(Tensor) the input tensor. |
+
|---|---|
| repeats | +(Tensor or int) The number of repetitions for each element. repeats is broadcasted to fit the shape of the given axis. |
+
| dim | +(int, optional) The dimension along which to repeat values. By default, use the flattened input array, and return a flat output array. |
+
Repeat elements of a tensor.
+This is different from `torch_Tensor.repeat` but similar to ``numpy.repeat``. ++ +
If the repeats is tensor([n1, n2, n3, ...]), then the output will be
+tensor([0, 0, ..., 1, 1, ..., 2, 2, ..., ...]) where 0 appears n1 times,
+1 appears n2 times, 2 appears n3 times, etc.
+if (torch_is_installed()) { +if (FALSE) { +x = torch_tensor(c(1, 2, 3)) +x$repeat_interleave(2) +y = torch_tensor(matrix(c(1, 2, 3, 4), ncol = 2, byrow=TRUE)) +torch_repeat_interleave(y, 2) +torch_repeat_interleave(y, 3, dim=1) +torch_repeat_interleave(y, torch_tensor(c(1, 2)), dim=1) +} +}
Reshape
+| input | +(Tensor) the tensor to be reshaped |
+
|---|---|
| shape | +(tuple of ints) the new shape |
+
Returns a tensor with the same data and number of elements as input,
+but with the specified shape. When possible, the returned tensor will be a view
+of input. Otherwise, it will be a copy. Contiguous inputs and inputs
+with compatible strides can be reshaped without copying, but you should not
+depend on the copying vs. viewing behavior.
See torch_Tensor.view on when it is possible to return a view.
A single dimension may be -1, in which case it's inferred from the remaining
+dimensions and the number of elements in input.
+if (torch_is_installed()) { + +a = torch_arange(0, 4) +torch_reshape(a, list(2, 2)) +b = torch_tensor(matrix(c(0, 1, 2, 3), ncol = 2, byrow=TRUE)) +torch_reshape(b, list(-1)) +}
Result_type
+| tensor1 | +(Tensor or Number) an input tensor or number |
+
|---|---|
| tensor2 | +(Tensor or Number) an input tensor or number |
+
Returns the torch_dtype that would result from performing an arithmetic
+operation on the provided input tensors. See type promotion documentation
+for more information on the type promotion logic.
+if (torch_is_installed()) { + +torch_result_type(tensor = torch_tensor(c(1, 2), dtype=torch_int()), 1.0) +}
Rfft
+| input | +(Tensor) the input tensor of at least |
+
|---|---|
| signal_ndim | +(int) the number of dimensions in each signal. |
+
| normalized | +(bool, optional) controls whether to return normalized results. Default: |
+
| onesided | +(bool, optional) controls whether to return half of results to avoid redundancy. Default: |
+
For CUDA tensors, an LRU cache is used for cuFFT plans to speed up +repeatedly running FFT methods on tensors of same geometry with same +configuration. See cufft-plan-cache for more details on how to +monitor and control the cache. ++ +
Real-to-complex Discrete Fourier Transform
+This method computes the real-to-complex discrete Fourier transform. It is
+mathematically equivalent with torch_fft with differences only in
+formats of the input and output.
This method supports 1D, 2D and 3D real-to-complex transforms, indicated
+by signal_ndim. input must be a tensor with at least
+signal_ndim dimensions with optionally arbitrary number of leading batch
+dimensions. If normalized is set to True, this normalizes the result
+by dividing it with \(\sqrt{\prod_{i=1}^K N_i}\) so that the operator is
+unitary, where \(N_i\) is the size of signal dimension \(i\).
The real-to-complex Fourier transform results follow conjugate symmetry:
+$$
+ X[\omega_1, \dots, \omega_d] = X^*[N_1 - \omega_1, \dots, N_d - \omega_d],
+$$
+where the index arithmetic is computed modulus the size of the corresponding
+dimension, \(\ ^*\) is the conjugate operator, and
+\(d\) = signal_ndim. onesided flag controls whether to avoid
+redundancy in the output results. If set to True (default), the output will
+not be full complex result of shape \((*, 2)\), where \(*\) is the shape
+of input, but instead the last dimension will be halfed as of size
+\(\lfloor \frac{N_d}{2} \rfloor + 1\).
The inverse of this function is torch_irfft.
For CPU tensors, this method is currently only available with MKL. Use
+torch_backends.mkl.is_available to check if MKL is installed.
+if (torch_is_installed()) { + +x = torch_randn(c(5, 5)) +torch_rfft(x, 2) +torch_rfft(x, 2, onesided=FALSE) +}
Roll
+| input | +(Tensor) the input tensor. |
+
|---|---|
| shifts | +(int or tuple of ints) The number of places by which the elements of the tensor are shifted. If shifts is a tuple, dims must be a tuple of the same size, and each dimension will be rolled by the corresponding value |
+
| dims | +(int or tuple of ints) Axis along which to roll |
+
Roll the tensor along the given dimension(s). Elements that are shifted beyond the +last position are re-introduced at the first position. If a dimension is not +specified, the tensor will be flattened before rolling and then restored +to the original shape.
+ ++if (torch_is_installed()) { + +x = torch_tensor(c(1, 2, 3, 4, 5, 6, 7, 8))$view(c(4, 2)) +x +torch_roll(x, 1, 1) +torch_roll(x, -1, 1) +torch_roll(x, shifts=list(2, 1), dims=list(1, 2)) +}
Rot90
+| input | +(Tensor) the input tensor. |
+
|---|---|
| k | +(int) number of times to rotate |
+
| dims | +(a list or tuple) axis to rotate |
+
Rotate a n-D tensor by 90 degrees in the plane specified by dims axis. +Rotation direction is from the first towards the second axis if k > 0, and from the second towards the first for k < 0.
+ ++if (torch_is_installed()) { + +x = torch_arange(0, 4)$view(c(2, 2)) +x +torch_rot90(x, 1, c(1, 2)) +x = torch_arange(0, 8)$view(c(2, 2, 2)) +x +torch_rot90(x, 1, c(1, 2)) +}
Rsqrt
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the reciprocal of the square-root of each of
+the elements of input.
$$ + \mbox{out}_{i} = \frac{1}{\sqrt{\mbox{input}_{i}}} +$$
+ ++
This function is experimental, don't use for long +term storage.
+torch_save(obj, path, ...)+ +
| obj | +the saved object |
+
|---|---|
| path | +a connection or the name of the file to save. |
+
| ... | +not currently used. |
+
Other torch_save:
+torch_load()
Sigmoid
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the sigmoid of the elements of input.
$$ + \mbox{out}_{i} = \frac{1}{1 + e^{-\mbox{input}_{i}}} +$$
+ ++
Sign
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the signs of the elements of input.
$$ + \mbox{out}_{i} = \mbox{sgn}(\mbox{input}_{i}) +$$
+ ++
Sinh
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the hyperbolic sine of the elements of
+input.
$$ + \mbox{out}_{i} = \sinh(\mbox{input}_{i}) +$$
+ ++
Slogdet
+| input | +(Tensor) the input tensor of size |
+
|---|
If ``input`` has zero determinant, this returns ``(0, -inf)``. ++ +
Backward through `slogdet` internally uses SVD results when `input` +is not invertible. In this case, double backward through `slogdet` +will be unstable in when `input` doesn't have distinct singular values. +See `~torch.svd` for details. ++ +
Calculates the sign and log absolute value of the determinant(s) of a square matrix or batches of square matrices.
+ ++if (torch_is_installed()) { + +A = torch_randn(c(3, 3)) +A +torch_det(A) +torch_logdet(A) +torch_slogdet(A) +}
Solve
+| input | +(Tensor) input matrix \(B\) of size \((*, m, k)\) , where \(*\) is zero or more batch dimensions. |
+
|---|---|
| A | +(Tensor) input square matrix of size \((*, m, m)\), where \(*\) is zero or more batch dimensions. |
+
| out | +((Tensor, Tensor) optional output tuple. |
+
Irrespective of the original strides, the returned matrices +`solution` and `LU` will be transposed, i.e. with strides like +`B.contiguous().transpose(-1, -2).stride()` and +`A.contiguous().transpose(-1, -2).stride()` respectively. ++ +
This function returns the solution to the system of linear
+equations represented by \(AX = B\) and the LU factorization of
+A, in order as a namedtuple solution, LU.
LU contains L and U factors for LU factorization of A.
torch_solve(B, A) can take in 2D inputs B, A or inputs that are
+batches of 2D matrices. If the inputs are batches, then returns
+batched outputs solution, LU.
+if (torch_is_installed()) { + +A = torch_tensor(rbind(c(6.80, -2.11, 5.66, 5.97, 8.23), + c(-6.05, -3.30, 5.36, -4.44, 1.08), + c(-0.45, 2.58, -2.70, 0.27, 9.04), + c(8.32, 2.71, 4.35, -7.17, 2.14), + c(-9.67, -5.14, -7.26, 6.08, -6.87)))$t() +B = torch_tensor(rbind(c(4.02, 6.19, -8.22, -7.57, -3.03), + c(-1.56, 4.00, -8.67, 1.75, 2.86), + c(9.81, -4.09, -4.57, -8.61, 8.99)))$t() +out = torch_solve(B, A) +X = out[[1]] +LU = out[[2]] +torch_dist(B, torch_mm(A, X)) +# Batched solver example +A = torch_randn(c(2, 3, 1, 4, 4)) +B = torch_randn(c(2, 3, 1, 4, 6)) +out = torch_solve(B, A) +X = out[[1]] +LU = out[[2]] +torch_dist(B, A$matmul(X)) +}
Sort
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int, optional) the dimension to sort along |
+
| descending | +(bool, optional) controls the sorting order (ascending or descending) |
+
| out | +(tuple, optional) the output tuple of ( |
+
Sorts the elements of the input tensor along a given dimension
+in ascending order by value.
If dim is not given, the last dimension of the input is chosen.
If descending is True then the elements are sorted in descending
+order by value.
A namedtuple of (values, indices) is returned, where the values are the
+sorted values and indices are the indices of the elements in the original
+input tensor.
+if (torch_is_installed()) { + +x = torch_randn(c(3, 4)) +out = torch_sort(x) +out +out = torch_sort(x, 1) +out +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_sparse_coo_tensor.RdSparse_coo_tensor
+| indices | +(array_like) Initial data for the tensor. Can be a list, tuple, NumPy |
+
|---|---|
| values | +(array_like) Initial values for the tensor. Can be a list, tuple, NumPy |
+
| size | +(list, tuple, or |
+
| dtype | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Constructs a sparse tensors in COO(rdinate) format with non-zero elements at the given indices
+with the given values. A sparse tensor can be uncoalesced, in that case, there are duplicate
+coordinates in the indices, and the value at that index is the sum of all duplicate value entries:
+torch_sparse_.
+if (torch_is_installed()) { + +i = torch_tensor(matrix(c(1, 2, 2, 3, 1, 3), ncol = 3, byrow = TRUE), dtype=torch_int64()) +v = torch_tensor(c(3, 4, 5), dtype=torch_float32()) +torch_sparse_coo_tensor(i, v) +torch_sparse_coo_tensor(i, v, c(2, 4)) + +# create empty sparse tensors +S = torch_sparse_coo_tensor( + torch_empty(c(1, 0), dtype = torch_int64()), + torch_tensor(numeric(), dtype = torch_float32()), + c(1) +) +S = torch_sparse_coo_tensor( + torch_empty(c(1, 0), dtype = torch_int64()), + torch_empty(c(0, 2)), + c(1, 2) +) +}
Split
+| tensor | +(Tensor) tensor to split. |
+
|---|---|
| split_size_or_sections | +(int) size of a single chunk or list of sizes for each chunk |
+
| dim | +(int) dimension along which to split the tensor. |
+
Splits the tensor into chunks. Each chunk is a view of the original tensor.
If `split_size_or_sections` is an integer type, then `tensor` will +be split into equally sized chunks (if possible). Last chunk will be smaller if +the tensor size along the given dimension `dim` is not divisible by +`split_size`. + +If `split_size_or_sections` is a list, then `tensor` will be split +into ``len(split_size_or_sections)`` chunks with sizes in `dim` according +to `split_size_or_sections`. ++ + +
Sqrt
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the square-root of the elements of input.
$$ + \mbox{out}_{i} = \sqrt{\mbox{input}_{i}} +$$
+ ++
Squeeze
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int, optional) if given, the input will be squeezed only in this dimension |
+
| out | +(Tensor, optional) the output tensor. |
+
The returned tensor shares the storage with the input tensor, +so changing the contents of one will change the contents of the other.
+Returns a tensor with all the dimensions of input of size 1 removed.
For example, if input is of shape:
+\((A \times 1 \times B \times C \times 1 \times D)\) then the out tensor
+will be of shape: \((A \times B \times C \times D)\).
When dim is given, a squeeze operation is done only in the given
+dimension. If input is of shape: \((A \times 1 \times B)\),
+squeeze(input, 0) leaves the tensor unchanged, but squeeze(input, 1)
+will squeeze the tensor to the shape \((A \times B)\).
+if (torch_is_installed()) { + +x = torch_zeros(c(2, 1, 2, 1, 2)) +x +y = torch_squeeze(x) +y +y = torch_squeeze(x, 1) +y +y = torch_squeeze(x, 2) +y +}
Stack
+| tensors | +(sequence of Tensors) sequence of tensors to concatenate |
+
|---|---|
| dim | +(int) dimension to insert. Has to be between 0 and the number of dimensions of concatenated tensors (inclusive) |
+
| out | +(Tensor, optional) the output tensor. |
+
Concatenates sequence of tensors along a new dimension.
+All tensors need to be of the same size.
+ +Std
+| input | +(Tensor) the input tensor. |
+
|---|---|
| unbiased | +(bool) whether to use the unbiased estimation or not |
+
| dim | +(int or tuple of ints) the dimension or dimensions to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the standard-deviation of all elements in the input tensor.
If unbiased is False, then the standard-deviation will be calculated
+via the biased estimator. Otherwise, Bessel's correction will be used.
Returns the standard-deviation of each row of the input tensor in the
+dimension dim. If dim is a list of dimensions,
+reduce over all of them.
If keepdim is True, the output tensor is of the same size
+as input except in the dimension(s) dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in the
+output tensor having 1 (or len(dim)) fewer dimension(s).
If unbiased is False, then the standard-deviation will be calculated
+via the biased estimator. Otherwise, Bessel's correction will be used.
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_std(a) + + +a = torch_randn(c(4, 4)) +a +torch_std(a, dim=1) +}
Std_mean
+| input | +(Tensor) the input tensor. |
+
|---|---|
| unbiased | +(bool) whether to use the unbiased estimation or not |
+
| dim | +(int or tuple of ints) the dimension or dimensions to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
Returns the standard-deviation and mean of all elements in the input tensor.
If unbiased is False, then the standard-deviation will be calculated
+via the biased estimator. Otherwise, Bessel's correction will be used.
Returns the standard-deviation and mean of each row of the input tensor in the
+dimension dim. If dim is a list of dimensions,
+reduce over all of them.
If keepdim is True, the output tensor is of the same size
+as input except in the dimension(s) dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in the
+output tensor having 1 (or len(dim)) fewer dimension(s).
If unbiased is False, then the standard-deviation will be calculated
+via the biased estimator. Otherwise, Bessel's correction will be used.
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_std_mean(a) + + +a = torch_randn(c(4, 4)) +a +torch_std_mean(a, 1) +}
Stft
+| input | +(Tensor) the input tensor |
+
|---|---|
| n_fft | +(int) size of Fourier transform |
+
| hop_length | +(int, optional) the distance between neighboring sliding window frames. Default: |
+
| win_length | +(int, optional) the size of window frame and STFT filter. Default: |
+
| window | +(Tensor, optional) the optional window function. Default: |
+
| center | +(bool, optional) whether to pad |
+
| pad_mode | +(string, optional) controls the padding method used when |
+
| normalized | +(bool, optional) controls whether to return the normalized STFT results Default: |
+
| onesided | +(bool, optional) controls whether to return half of results to avoid redundancy Default: |
+
Short-time Fourier transform (STFT).
Ignoring the optional batch dimension, this method computes the following +expression: ++ +
$$
+ X[m, \omega] = \sum_{k = 0}^{\mbox{win\_length-1}}%
+ \mbox{window}[k]\ \mbox{input}[m \times \mbox{hop\_length} + k]\ %
+ \exp\left(- j \frac{2 \pi \cdot \omega k}{\mbox{win\_length}}\right),
+$$
+where \(m\) is the index of the sliding window, and \(\omega\) is
+the frequency that \(0 \leq \omega < \mbox{n\_fft}\). When
+onesided is the default value True,
* `input` must be either a 1-D time sequence or a 2-D batch of time
+ sequences.
+
+* If `hop_length` is ``None`` (default), it is treated as equal to
+ ``floor(n_fft / 4)``.
+
+* If `win_length` is ``None`` (default), it is treated as equal to
+ `n_fft`.
+
+* `window` can be a 1-D tensor of size `win_length`, e.g., from
+ `torch_hann_window`. If `window` is ``None`` (default), it is
+ treated as if having \eqn{1} everywhere in the window. If
+ \eqn{\mbox{win\_length} < \mbox{n\_fft}}, `window` will be padded on
+ both sides to length `n_fft` before being applied.
+
+* If `center` is ``True`` (default), `input` will be padded on
+ both sides so that the \eqn{t}-th frame is centered at time
+ \eqn{t \times \mbox{hop\_length}}. Otherwise, the \eqn{t}-th frame
+ begins at time \eqn{t \times \mbox{hop\_length}}.
+
+* `pad_mode` determines the padding method used on `input` when
+ `center` is ``True``. See `torch_nn.functional.pad` for
+ all available options. Default is ``"reflect"``.
+
+* If `onesided` is ``True`` (default), only values for \eqn{\omega}
+ in \eqn{\left[0, 1, 2, \dots, \left\lfloor \frac{\mbox{n\_fft}}{2} \right\rfloor + 1\right]}
+ are returned because the real-to-complex Fourier transform satisfies the
+ conjugate symmetry, i.e., \eqn{X[m, \omega] = X[m, \mbox{n\_fft} - \omega]^*}.
+
+* If `normalized` is ``True`` (default is ``False``), the function
+ returns the normalized STFT results, i.e., multiplied by \eqn{(\mbox{frame\_length})^{-0.5}}.
+
+Returns the real and the imaginary parts together as one tensor of size
+\eqn{(* \times N \times T \times 2)}, where \eqn{*} is the optional
+batch size of `input`, \eqn{N} is the number of frequencies where
+STFT is applied, \eqn{T} is the total number of frames used, and each pair
+in the last dimension represents a complex number as the real part and the
+imaginary part.
+
+.. warning::
+ This function changed signature at version 0.4.1. Calling with the
+ previous signature may cause error or return incorrect result.
+
+
+
+ Sum
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dtype | +( |
+
| dim | +(int or tuple of ints) the dimension or dimensions to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
Returns the sum of all elements in the input tensor.
Returns the sum of each row of the input tensor in the given
+dimension dim. If dim is a list of dimensions,
+reduce over all of them.
If keepdim is True, the output tensor is of the same size
+as input except in the dimension(s) dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in the
+output tensor having 1 (or len(dim)) fewer dimension(s).
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_sum(a) + + +a = torch_randn(c(4, 4)) +a +torch_sum(a, 1) +b = torch_arange(0, 4 * 5 * 6)$view(c(4, 5, 6)) +torch_sum(b, list(2, 1)) +}
Svd
+| input | +(Tensor) the input tensor of size \((*, m, n)\) where |
+
|---|---|
| some | +(bool, optional) controls the shape of returned |
+
| compute_uv | +(bool, optional) option whether to compute |
+
| out | +(tuple, optional) the output tuple of tensors |
+
The singular values are returned in descending order. If input is a batch of matrices,
+then the singular values of each matrix in the batch is returned in descending order.
The implementation of SVD on CPU uses the LAPACK routine ?gesdd (a divide-and-conquer
+algorithm) instead of ?gesvd for speed. Analogously, the SVD on GPU uses the MAGMA routine
+gesdd as well.
Irrespective of the original strides, the returned matrix U
+will be transposed, i.e. with strides U.contiguous().transpose(-2, -1).stride()
Extra care needs to be taken when backward through U and V
+outputs. Such operation is really only stable when input is
+full rank with all distinct singular values. Otherwise, NaN can
+appear as the gradients are not properly defined. Also, notice that
+double backward will usually do an additional backward through U and
+V even if the original backward is only on S.
When some = False, the gradients on U[..., :, min(m, n):]
+and V[..., :, min(m, n):] will be ignored in backward as those vectors
+can be arbitrary bases of the subspaces.
When compute_uv = False, backward cannot be performed since U and V
+from the forward pass is required for the backward operation.
This function returns a namedtuple (U, S, V) which is the singular value
+decomposition of a input real matrix or batches of real matrices input such that
+\(input = U \times diag(S) \times V^T\).
If some is True (default), the method returns the reduced singular value decomposition
+i.e., if the last two dimensions of input are m and n, then the returned
+U and V matrices will contain only \(min(n, m)\) orthonormal columns.
If compute_uv is False, the returned U and V matrices will be zero matrices
+of shape \((m \times m)\) and \((n \times n)\) respectively. some will be ignored here.
+if (torch_is_installed()) { + +a = torch_randn(c(5, 3)) +a +out = torch_svd(a) +u = out[[1]] +s = out[[2]] +v = out[[3]] +torch_dist(a, torch_mm(torch_mm(u, torch_diag(s)), v$t())) +a_big = torch_randn(c(7, 5, 3)) +out = torch_svd(a_big) +u = out[[1]] +s = out[[2]] +v = out[[3]] +torch_dist(a_big, torch_matmul(torch_matmul(u, torch_diag_embed(s)), v$transpose(-2, -1))) +}
Symeig
+| input | +(Tensor) the input tensor of size \((*, n, n)\) where |
+
|---|---|
| eigenvectors | +(boolean, optional) controls whether eigenvectors have to be computed |
+
| upper | +(boolean, optional) controls whether to consider upper-triangular or lower-triangular region |
+
| out | +(tuple, optional) the output tuple of (Tensor, Tensor) |
+
The eigenvalues are returned in ascending order. If input is a batch of matrices,
+then the eigenvalues of each matrix in the batch is returned in ascending order.
Irrespective of the original strides, the returned matrix V will
+be transposed, i.e. with strides V.contiguous().transpose(-1, -2).stride().
Extra care needs to be taken when backward through outputs. Such
+operation is really only stable when all eigenvalues are distinct.
+Otherwise, NaN can appear as the gradients are not properly defined.
This function returns eigenvalues and eigenvectors
+of a real symmetric matrix input or a batch of real symmetric matrices,
+represented by a namedtuple (eigenvalues, eigenvectors).
This function calculates all eigenvalues (and vectors) of input
+such that \(\mbox{input} = V \mbox{diag}(e) V^T\).
The boolean argument eigenvectors defines computation of
+both eigenvectors and eigenvalues or eigenvalues only.
If it is False, only eigenvalues are computed. If it is True,
+both eigenvalues and eigenvectors are computed.
Since the input matrix input is supposed to be symmetric,
+only the upper triangular portion is used by default.
If upper is False, then lower triangular portion is used.
+if (torch_is_installed()) { + +a = torch_randn(c(5, 5)) +a = a + a$t() # To make a symmetric +a +o = torch_symeig(a, eigenvectors=TRUE) +e = o[[1]] +v = o[[2]] +e +v +a_big = torch_randn(c(5, 2, 2)) +a_big = a_big + a_big$transpose(-2, -1) # To make a_big symmetric +o = a_big$symeig(eigenvectors=TRUE) +e = o[[1]] +v = o[[2]] +torch_allclose(torch_matmul(v, torch_matmul(e$diag_embed(), v$transpose(-2, -1))), a_big) +}
T
+| input | +(Tensor) the input tensor. |
+
|---|
Expects input to be <= 2-D tensor and transposes dimensions 0
+and 1.
0-D and 1-D tensors are returned as is. When input is a 2-D tensor this
+is equivalent to transpose(input, 0, 1).
+if (torch_is_installed()) { + +x = torch_randn(c(2,3)) +x +torch_t(x) +x = torch_randn(c(3)) +x +torch_t(x) +x = torch_randn(c(2, 3)) +x +torch_t(x) +}
Take
+| input | +(Tensor) the input tensor. |
+
|---|---|
| indices | +(LongTensor) the indices into tensor |
+
Returns a new tensor with the elements of input at the given indices.
+The input tensor is treated as if it were viewed as a 1-D tensor. The result
+takes the same shape as the indices.
+if (torch_is_installed()) { + +src = torch_tensor(matrix(c(4,3,5,6,7,8), ncol = 3, byrow = TRUE)) +torch_take(src, torch_tensor(c(1, 2, 5), dtype = torch_int64())) +}
Tan
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the tangent of the elements of input.
$$ + \mbox{out}_{i} = \tan(\mbox{input}_{i}) +$$
+ ++
Tanh
+| input | +(Tensor) the input tensor. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
Returns a new tensor with the hyperbolic tangent of the elements
+of input.
$$ + \mbox{out}_{i} = \tanh(\mbox{input}_{i}) +$$
+ ++
Converts R objects to a torch tensor
+torch_tensor( + data, + dtype = NULL, + device = NULL, + requires_grad = FALSE, + pin_memory = FALSE +)+ +
| data | +an R atomic vector, matrix or array |
+
|---|---|
| dtype | +a torch_dtype instance |
+
| device | +a device creted with |
+
| requires_grad | +if autograd should record operations on the returned tensor. |
+
| pin_memory | +If set, returned tensor would be allocated in the pinned memory. |
+
+if (torch_is_installed()) { +torch_tensor(c(1,2,3,4)) +torch_tensor(c(1,2,3,4), dtype = torch_int()) + +}
Tensordot
+| a | +(Tensor) Left tensor to contract |
+
|---|---|
| b | +(Tensor) Right tensor to contract |
+
| dims | +(int or tuple of two lists of integers) number of dimensions to contract or explicit lists of dimensions for |
+
Returns a contraction of a and b over multiple dimensions.
`tensordot` implements a generalized matrix product. ++ + +
+if (torch_is_installed()) { + +a = torch_arange(start = 0, end = 60.)$reshape(c(3, 4, 5)) +b = torch_arange(start = 0, end = 24.)$reshape(c(4, 3, 2)) +torch_tensordot(a, b, dims_self=c(2, 1), dims_other = c(1, 2)) +if (FALSE) { +a = torch_randn(3, 4, 5, device='cuda') +b = torch_randn(4, 5, 6, device='cuda') +c = torch_tensordot(a, b, dims=2)$cpu() +} +}
Topk
+| input | +(Tensor) the input tensor. |
+
|---|---|
| k | +(int) the k in "top-k" |
+
| dim | +(int, optional) the dimension to sort along |
+
| largest | +(bool, optional) controls whether to return largest or smallest elements |
+
| sorted | +(bool, optional) controls whether to return the elements in sorted order |
+
| out | +(tuple, optional) the output tuple of (Tensor, LongTensor) that can be optionally given to be used as output buffers |
+
Returns the k largest elements of the given input tensor along
+a given dimension.
If dim is not given, the last dimension of the input is chosen.
If largest is False then the k smallest elements are returned.
A namedtuple of (values, indices) is returned, where the indices are the indices
+of the elements in the original input tensor.
The boolean option sorted if True, will make sure that the returned
+k elements are themselves sorted
+
Transpose
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim0 | +(int) the first dimension to be transposed |
+
| dim1 | +(int) the second dimension to be transposed |
+
Returns a tensor that is a transposed version of input.
+The given dimensions dim0 and dim1 are swapped.
The resulting out tensor shares it's underlying storage with the
+input tensor, so changing the content of one would change the content
+of the other.
+
Trapz
+| y | +(Tensor) The values of the function to integrate |
+
|---|---|
| x | +(Tensor) The points at which the function |
+
| dim | +(int) The dimension along which to integrate. By default, use the last dimension. |
+
| dx | +(float) The distance between points at which |
+
Estimate \(\int y\,dx\) along dim, using the trapezoid rule.
As above, but the sample points are spaced uniformly at a distance of dx.
+if (torch_is_installed()) { + +y = torch_randn(list(2, 3)) +y +x = torch_tensor(matrix(c(1, 3, 4, 1, 2, 3), ncol = 3, byrow=TRUE)) +torch_trapz(y, x = x) + +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_triangular_solve.RdTriangular_solve
+| input | +(Tensor) multiple right-hand sides of size \((*, m, k)\) where \(*\) is zero of more batch dimensions (\(b\)) |
+
|---|---|
| A | +(Tensor) the input triangular coefficient matrix of size \((*, m, m)\) where \(*\) is zero or more batch dimensions |
+
| upper | +(bool, optional) whether to solve the upper-triangular system of equations (default) or the lower-triangular system of equations. Default: |
+
| transpose | +(bool, optional) whether \(A\) should be transposed before being sent into the solver. Default: |
+
| unitriangular | +(bool, optional) whether \(A\) is unit triangular. If True, the diagonal elements of \(A\) are assumed to be 1 and not referenced from \(A\). Default: |
+
Solves a system of equations with a triangular coefficient matrix \(A\) +and multiple right-hand sides \(b\).
+In particular, solves \(AX = b\) and assumes \(A\) is upper-triangular +with the default keyword arguments.
+torch_triangular_solve(b, A) can take in 2D inputs b, A or inputs that are
+batches of 2D matrices. If the inputs are batches, then returns
+batched outputs X
+if (torch_is_installed()) { + +A = torch_randn(c(2, 2))$triu() +A +b = torch_randn(c(2, 3)) +b +torch_triangular_solve(b, A) +}
Tril
+| input | +(Tensor) the input tensor. |
+
|---|---|
| diagonal | +(int, optional) the diagonal to consider |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the lower triangular part of the matrix (2-D tensor) or batch of matrices
+input, the other elements of the result tensor out are set to 0.
The lower triangular part of the matrix is defined as the elements on and +below the diagonal.
+The argument diagonal controls which diagonal to consider. If
+diagonal = 0, all elements on and below the main diagonal are
+retained. A positive value includes just as many diagonals above the main
+diagonal, and similarly a negative value excludes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+\(\lbrace (i, i) \rbrace\) for \(i \in [0, \min\{d_{1}, d_{2}\} - 1]\) where
+\(d_{1}, d_{2}\) are the dimensions of the matrix.
+if (torch_is_installed()) { + +a = torch_randn(c(3, 3)) +a +torch_tril(a) +b = torch_randn(c(4, 6)) +b +torch_tril(b, diagonal=1) +torch_tril(b, diagonal=-1) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_tril_indices.RdTril_indices
+| row | +( |
+
|---|---|
| col | +( |
+
| offset | +( |
+
| dtype | +( |
+
| device | +( |
+
| layout | +( |
+
When running on CUDA, ``row * col`` must be less than \eqn{2^{59}} to
+prevent overflow during calculation.
+
+
+ Returns the indices of the lower triangular part of a row-by-
+col matrix in a 2-by-N Tensor, where the first row contains row
+coordinates of all indices and the second row contains column coordinates.
+Indices are ordered based on rows and then columns.
The lower triangular part of the matrix is defined as the elements on and +below the diagonal.
+The argument offset controls which diagonal to consider. If
+offset = 0, all elements on and below the main diagonal are
+retained. A positive value includes just as many diagonals above the main
+diagonal, and similarly a negative value excludes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+\(\lbrace (i, i) \rbrace\) for \(i \in [0, \min\{d_{1}, d_{2}\} - 1]\)
+where \(d_{1}, d_{2}\) are the dimensions of the matrix.
+if (torch_is_installed()) { +if (FALSE) { +a = torch_tril_indices(3, 3) +a +a = torch_tril_indices(4, 3, -1) +a +a = torch_tril_indices(4, 3, 1) +a +} +}
Triu
+| input | +(Tensor) the input tensor. |
+
|---|---|
| diagonal | +(int, optional) the diagonal to consider |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices
+input, the other elements of the result tensor out are set to 0.
The upper triangular part of the matrix is defined as the elements on and +above the diagonal.
+The argument diagonal controls which diagonal to consider. If
+diagonal = 0, all elements on and above the main diagonal are
+retained. A positive value excludes just as many diagonals above the main
+diagonal, and similarly a negative value includes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+\(\lbrace (i, i) \rbrace\) for \(i \in [0, \min\{d_{1}, d_{2}\} - 1]\) where
+\(d_{1}, d_{2}\) are the dimensions of the matrix.
+if (torch_is_installed()) { + +a = torch_randn(c(3, 3)) +a +torch_triu(a) +torch_triu(a, diagonal=1) +torch_triu(a, diagonal=-1) +b = torch_randn(c(4, 6)) +b +torch_triu(b, diagonal=1) +torch_triu(b, diagonal=-1) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_triu_indices.RdTriu_indices
+| row | +( |
+
|---|---|
| col | +( |
+
| offset | +( |
+
| dtype | +( |
+
| device | +( |
+
| layout | +( |
+
When running on CUDA, ``row * col`` must be less than \eqn{2^{59}} to
+prevent overflow during calculation.
+
+
+ Returns the indices of the upper triangular part of a row by
+col matrix in a 2-by-N Tensor, where the first row contains row
+coordinates of all indices and the second row contains column coordinates.
+Indices are ordered based on rows and then columns.
The upper triangular part of the matrix is defined as the elements on and +above the diagonal.
+The argument offset controls which diagonal to consider. If
+offset = 0, all elements on and above the main diagonal are
+retained. A positive value excludes just as many diagonals above the main
+diagonal, and similarly a negative value includes just as many diagonals below
+the main diagonal. The main diagonal are the set of indices
+\(\lbrace (i, i) \rbrace\) for \(i \in [0, \min\{d_{1}, d_{2}\} - 1]\)
+where \(d_{1}, d_{2}\) are the dimensions of the matrix.
+if (torch_is_installed()) { +if (FALSE) { +a = torch_triu_indices(3, 3) +a +a = torch_triu_indices(4, 3, -1) +a +a = torch_triu_indices(4, 3, 1) +a +} +}
True_divide
+| dividend | +(Tensor) the dividend |
+
|---|---|
| divisor | +(Tensor or Scalar) the divisor |
+
Performs "true division" that always computes the division
+in floating point. Analogous to division in Python 3 and equivalent to
+torch_div except when both inputs have bool or integer scalar types,
+in which case they are cast to the default (floating) scalar type before the division.
$$ + \mbox{out}_i = \frac{\mbox{dividend}_i}{\mbox{divisor}} +$$
+ ++if (torch_is_installed()) { + +dividend = torch_tensor(c(5, 3), dtype=torch_int()) +divisor = torch_tensor(c(3, 2), dtype=torch_int()) +torch_true_divide(dividend, divisor) +torch_true_divide(dividend, 2) +}
R/gen-namespace-docs.R, R/gen-namespace-examples.R
+ torch_unique_consecutive.RdUnique_consecutive
+| input | +(Tensor) the input tensor |
+
|---|---|
| return_inverse | +(bool) Whether to also return the indices for where elements in the original input ended up in the returned unique list. |
+
| return_counts | +(bool) Whether to also return the counts for each unique element. |
+
| dim | +(int) the dimension to apply unique. If |
+
Eliminates all but the first element from every consecutive group of equivalent elements.
.. note:: This function is different from [`torch_unique`] in the sense that this function + only eliminates consecutive duplicate values. This semantics is similar to `std::unique` + in C++. ++ + +
+if (torch_is_installed()) { +x = torch_tensor(c(1, 1, 2, 2, 3, 1, 1, 2)) +output = torch_unique_consecutive(x) +output +torch_unique_consecutive(x, return_inverse=TRUE) +torch_unique_consecutive(x, return_counts=TRUE) +}
Unsqueeze
+| input | +(Tensor) the input tensor. |
+
|---|---|
| dim | +(int) the index at which to insert the singleton dimension |
+
Returns a new tensor with a dimension of size one inserted at the +specified position.
+The returned tensor shares the same underlying data with this tensor.
+A dim value within the range [-input.dim() - 1, input.dim() + 1)
+can be used. Negative dim will correspond to unsqueeze
+applied at dim = dim + input.dim() + 1.
+if (torch_is_installed()) { + +x = torch_tensor(c(1, 2, 3, 4)) +torch_unsqueeze(x, 1) +torch_unsqueeze(x, 2) +}
Var
+| input | +(Tensor) the input tensor. |
+
|---|---|
| unbiased | +(bool) whether to use the unbiased estimation or not |
+
| dim | +(int or tuple of ints) the dimension or dimensions to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
| out | +(Tensor, optional) the output tensor. |
+
Returns the variance of all elements in the input tensor.
If unbiased is False, then the variance will be calculated via the
+biased estimator. Otherwise, Bessel's correction will be used.
Returns the variance of each row of the input tensor in the given
+dimension dim.
If keepdim is True, the output tensor is of the same size
+as input except in the dimension(s) dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in the
+output tensor having 1 (or len(dim)) fewer dimension(s).
If unbiased is False, then the variance will be calculated via the
+biased estimator. Otherwise, Bessel's correction will be used.
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_var(a) + + +a = torch_randn(c(4, 4)) +a +torch_var(a, 1) +}
Var_mean
+| input | +(Tensor) the input tensor. |
+
|---|---|
| unbiased | +(bool) whether to use the unbiased estimation or not |
+
| dim | +(int or tuple of ints) the dimension or dimensions to reduce. |
+
| keepdim | +(bool) whether the output tensor has |
+
Returns the variance and mean of all elements in the input tensor.
If unbiased is False, then the variance will be calculated via the
+biased estimator. Otherwise, Bessel's correction will be used.
Returns the variance and mean of each row of the input tensor in the given
+dimension dim.
If keepdim is True, the output tensor is of the same size
+as input except in the dimension(s) dim where it is of size 1.
+Otherwise, dim is squeezed (see torch_squeeze), resulting in the
+output tensor having 1 (or len(dim)) fewer dimension(s).
If unbiased is False, then the variance will be calculated via the
+biased estimator. Otherwise, Bessel's correction will be used.
+if (torch_is_installed()) { + +a = torch_randn(c(1, 3)) +a +torch_var_mean(a) + + +a = torch_randn(c(4, 4)) +a +torch_var_mean(a, 1) +}
Where
+| condition | +(BoolTensor) When True (nonzero), yield x, otherwise yield y |
+
|---|---|
| x | +(Tensor) values selected at indices where |
+
| y | +(Tensor) values selected at indices where |
+
The tensors `condition`, `x`, `y` must be broadcastable . ++ +
See also [`torch_nonzero`]. ++ +
Return a tensor of elements selected from either x or y, depending on condition.
The operation is defined as:
+$$ + \mbox{out}_i = \left\{ \begin{array}{ll} + \mbox{x}_i & \mbox{if } \mbox{condition}_i \\ + \mbox{y}_i & \mbox{otherwise} \\ + \end{array} + \right. +$$
+torch_where(condition) is identical to
+torch_nonzero(condition, as_tuple=True).
+if (torch_is_installed()) { + +if (FALSE) { +x = torch_randn(c(3, 2)) +y = torch_ones(c(3, 2)) +x +torch_where(x > 0, x, y) +} + + + +}
Zeros
+| size | +(int...) a sequence of integers defining the shape of the output tensor. Can be a variable number of arguments or a collection like a list or tuple. |
+
|---|---|
| out | +(Tensor, optional) the output tensor. |
+
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
Returns a tensor filled with the scalar value 0, with the shape defined
+by the variable argument size.
+
Zeros_like
+| input | +(Tensor) the size of |
+
|---|---|
| dtype | +( |
+
| layout | +( |
+
| device | +( |
+
| requires_grad | +(bool, optional) If autograd should record operations on the returned tensor. Default: |
+
| memory_format | +( |
+
Returns a tensor filled with the scalar value 0, with the same size as
+input. torch_zeros_like(input) is equivalent to
+torch_zeros(input.size(), dtype=input.dtype, layout=input.layout, device=input.device).
As of 0.4, this function does not support an out keyword. As an alternative,
+the old torch_zeros_like(input, out=output) is equivalent to
+torch_zeros(input.size(), out=output).
+
Context-manager that enables gradient calculation. +Enables gradient calculation, if it has been disabled via with_no_grad.
+with_enable_grad(code)+ +
| code | +code to be executed with gradient recording. |
+
|---|
This context manager is thread local; it will not affect computation in +other threads.
+ ++if (torch_is_installed()) { + +x <- torch_tensor(1, requires_grad=TRUE) +with_no_grad({ + with_enable_grad({ + y = x * 2 + }) +}) +y$backward() +x$grad + +}
Temporarily modify gradient recording.
+with_no_grad(code)+ +
| code | +code to be executed with no gradient recording. |
+
|---|
+if (torch_is_installed()) { +x <- torch_tensor(runif(5), requires_grad = TRUE) +with_no_grad({ + x$sub_(torch_tensor(as.numeric(1:5))) +}) +x +x$grad + +}