Skip to content

Commit

Permalink
Revert "[Reland] fix missing-prototypes warnings in torch_cpu (Part 4) (
Browse files Browse the repository at this point in the history
pytorch#101949)"

This reverts commit 4f2c007.

Reverted pytorch#101949 on behalf of https://github.com/osalpekar due to As noted in @izaitsevfb's comment, we are still seeing linker errors, this time due to `nnc_prepacked_linear_clamp_run` being made a static function. ([comment](pytorch#101949 (comment)))
  • Loading branch information
pytorchmergebot committed May 23, 2023
1 parent 45a8f69 commit 32ce06a
Show file tree
Hide file tree
Showing 138 changed files with 772 additions and 572 deletions.
1 change: 0 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1598,7 +1598,6 @@ TORCH_COPTS = COMMON_COPTS + [
"-fvisibility-inlines-hidden",
"-fno-math-errno ",
"-fno-trapping-math",
"-Wno-error=unused-function",
]

torch_sources = {
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/core/ivalue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ IValueComparator getGreaterThanComparator(const IValue& v) {
};
}

std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
static std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
out << v.qualifiedClassName() << "." << v.name();
return out;
}
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/core/ivalue_inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1628,7 +1628,7 @@ struct ivalue::EnumHolder : c10::intrusive_ptr_target {

TORCH_API friend std::ostream& operator<<(
std::ostream& out,
const ivalue::EnumHolder& v);
const EnumHolder& v);

TORCH_API const std::string qualifiedClassName() const;

Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/functorch/BatchRulesReduceOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ static std::tuple<Tensor,optional<int64_t>> searchsorted_batch_rule(
TORCH_INTERNAL_ASSERT(false);
}

static Tensor bucketize_decomp_Tensor(
Tensor bucketize_decomp_Tensor(
const Tensor& self,
const Tensor& boundaries,
bool out_int32,
Expand All @@ -415,7 +415,7 @@ static Tensor bucketize_decomp_Tensor(
return at::searchsorted(boundaries, self, out_int32, right, nullopt, nullopt);
}

static Tensor bucketize_decomp_Scalar(
Tensor bucketize_decomp_Scalar(
const Scalar& self,
const Tensor& boundaries,
bool out_int32,
Expand Down
5 changes: 3 additions & 2 deletions aten/src/ATen/native/Activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,8 @@ TORCH_IMPL_FUNC(softshrink_backward_out) (
shrink_backward_stub(device_type(), *this, lambd);
}

#if AT_MKLDNN_ENABLED()
static bool use_mkldnn(const Tensor& input) {
#if AT_MKLDNN_ENABLED()
if (!at::globalContext().userEnabledMkldnn()) {
return false;
}
Expand All @@ -386,8 +386,9 @@ static bool use_mkldnn(const Tensor& input) {
(input.device().is_cpu() &&
(((input.scalar_type() == kBFloat16) && mkldnn_bf16_device_check()) ||
(input.scalar_type() == kFloat))); // input is dense layout and bfloat16/float32
}
#endif
return false;
}

TORCH_IMPL_FUNC(gelu_out_cpu) (
const Tensor& self, c10::string_view approximate, const Tensor& result
Expand Down
6 changes: 3 additions & 3 deletions aten/src/ATen/native/BinaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ Tensor& arctan2_out(const Tensor& self, const Tensor& other, Tensor& result) {
return at::atan2_out(result, self, other);
}

static Tensor& add_relu_impl(
Tensor& add_relu_impl(
Tensor& result, const Tensor& self, const Tensor& other, const Scalar& alpha) {
auto iter = TensorIterator::binary_op(result, self, other);
Scalar min_val;
Expand Down Expand Up @@ -1003,7 +1003,7 @@ Tensor& mul__scalar_sparse_csr(Tensor& self, const Scalar& other) {
return self;
}

static Device correct_out_device(const Tensor& self, const Tensor& other) {
Device correct_out_device(const Tensor& self, const Tensor& other) {
if (self.device() == at::kCPU){
return other.device();
} else {
Expand Down Expand Up @@ -1049,7 +1049,7 @@ Tensor div_zerotensor(const Tensor& self, const Tensor& other) {
}
}

static Tensor maybe_add_maybe_sub(const Tensor& self, const Tensor& other, const Scalar& alpha) {
Tensor maybe_add_maybe_sub(const Tensor& self, const Tensor& other, const Scalar& alpha) {
auto out_device = correct_out_device(self, other);
// hack to use the TensorIterator to get the correct broadcasting and type promotion logic
auto device_ = Device(DeviceType::Meta);
Expand Down
2 changes: 0 additions & 2 deletions aten/src/ATen/native/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,6 @@ static void check_input_same_type_as_parameters(
check_input_same_type_as_parameters(input, weight, /*bias=*/ Tensor());
}

#if AT_MKLDNN_ENABLED()
static void check_input_same_type_as_parameters(
const Tensor& input,
const Tensor& weight,
Expand All @@ -789,7 +788,6 @@ static void check_input_same_type_as_parameters(
check_input_same_type_as_parameters(input, weight, bias);
}
}
#endif

static auto view4d(const at::Tensor& tensor) -> at::Tensor {
TORCH_CHECK(tensor.ndimension() == 3,
Expand Down
1 change: 0 additions & 1 deletion aten/src/ATen/native/Copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/_copy_from.h>
#include <ATen/ops/_propagate_xla_data.h>
#include <ATen/ops/copy_native.h>
#include <ATen/ops/empty.h>
#include <ATen/ops/expand_copy.h>
Expand Down
5 changes: 0 additions & 5 deletions aten/src/ATen/native/LegacyBatching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
#include <ATen/WrapDimUtils.h>
#include <ATen/LegacyVmapTransforms.h>

#ifdef AT_PER_OPERATOR_HEADERS
#include <ATen/ops/_add_batch_dim_native.h>
#include <ATen/ops/_remove_batch_dim_native.h>
#endif

namespace at { namespace native {

// Adds a batch dimension to the tensor `self` out-of-place
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/native/LinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1893,7 +1893,7 @@ The behavior depends on the dimensionality of the Tensors as follows:
- Otherwise, we return bmm, after broadcasting and folding the batched dimensions if
there's more than one
*/
static Tensor _matmul_impl(
Tensor _matmul_impl(
Tensor& out,
const Tensor& tensor1,
const Tensor& tensor2) {
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/native/PackedSequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

namespace at { namespace native {

static void checkLongTensor(const Tensor& tensor) {
void checkLongTensor(const Tensor& tensor) {
TORCH_CHECK(tensor.dim() == 1 && tensor.device().type() == at::kCPU && tensor.scalar_type() == at::kLong,
"'lengths' argument should be a 1D CPU int64 tensor, but got ",
tensor.dim(), "D ", tensor.device().str(), " ", tensor.scalar_type(), " tensor");
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/native/RNN.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,7 +1809,7 @@ std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data(
std::move(std::get<2>(results)));
}

static std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data_legacy(
std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data_legacy(
const Tensor& data,
const Tensor& batch_sizes,
c10::List<at::Tensor> hx_,
Expand Down
1 change: 0 additions & 1 deletion aten/src/ATen/native/Resize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <ATen/ops/resize_as_native.h>
#include <ATen/ops/resize_native.h>
#include <ATen/ops/resize.h>
#include <ATen/ops/_resize_output.h>
#endif

namespace at { namespace native {
Expand Down
12 changes: 8 additions & 4 deletions aten/src/ATen/native/TensorAdvancedIndexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ static void build_index_op(
iter.build(config);
}

static void check_indices_on_cpu_or_selfdevice(
void check_indices_on_cpu_or_selfdevice(
const Tensor& self,
const at::MaterializedIOptTensorListRef& indices) {
auto dev = self.device();
Expand Down Expand Up @@ -965,7 +965,7 @@ TORCH_IMPL_FUNC(index_add_cpu_out)
}
}

static void index_reduce_func_impl(
void index_reduce_func_impl(
const Tensor& self,
int64_t dim,
const Tensor& index,
Expand Down Expand Up @@ -1149,7 +1149,7 @@ static void check_indexarray_range(
}
}

static Tensor & index_select_out_cpu_dim1_(
Tensor & index_select_out_cpu_dim1_(
Tensor & result_contig, const Tensor & self, const Tensor & index_contig) {

auto self_contig = self.contiguous();
Expand Down Expand Up @@ -1379,6 +1379,10 @@ Tensor index_select_quantized_cpu_(const Tensor & self, int64_t dim, const Tenso
return at::native::index_select_out_cpu_(self, dim, index, result);
}

Tensor index_select_backward(const Tensor& grad, at::IntArrayRef self_sizes, int64_t dim, const Tensor& index) {
return at::native::index_select_backward_symint(grad, c10::fromIntArrayRefSlow(self_sizes), dim, index);
}

Tensor index_select_backward_symint(const Tensor& grad, c10::SymIntArrayRef self_sizes, int64_t dim, const Tensor& index) {
// for composite compliance, use out-of-place variant of
// `index_add` if index tensor is a Tensor Subclass.
Expand Down Expand Up @@ -1533,7 +1537,7 @@ static void scatter_reduce_exclude_self_helper(
});
}

static void _scatter_via_index_put(
void _scatter_via_index_put(
const Tensor& self,
int64_t dim,
const Tensor& index,
Expand Down
6 changes: 3 additions & 3 deletions aten/src/ATen/native/TensorConversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ Tensor dense_to_sparse_bsc(const Tensor& self, IntArrayRef blocksize, c10::optio
return dense_to_sparse_compressed<Layout::SparseBsc>(self, blocksize, dense_dim_opt);
}

static void _check_blocksize_matches(
void _check_blocksize_matches(
const Tensor& self,
c10::optional<IntArrayRef> blocksize_opt,
const std::string& name) {
Expand All @@ -1023,7 +1023,7 @@ static void _check_blocksize_matches(
}
}

static Tensor sparse_compressed_clone(
Tensor sparse_compressed_clone(
const Tensor& self,
c10::optional<IntArrayRef> blocksize,
const std::string& name) {
Expand All @@ -1046,7 +1046,7 @@ static Tensor sparse_compressed_clone(
values.device());
}

static Tensor sparse_compressed_to_flipped(
Tensor sparse_compressed_to_flipped(
const Tensor& self,
c10::optional<IntArrayRef> blocksize,
const std::string& name) {
Expand Down
1 change: 0 additions & 1 deletion aten/src/ATen/native/Unfold3d.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <ATen/native/Unfold3d.h>
#include <ATen/Config.h>
#include <ATen/Dispatch.h>
#include <ATen/Parallel.h>
Expand Down
2 changes: 0 additions & 2 deletions aten/src/ATen/native/WeightNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
#else
#include <ATen/ops/_weight_norm_differentiable_backward_native.h>
#include <ATen/ops/_weight_norm_interface.h>
#include <ATen/ops/_weight_norm_interface_backward_native.h>
#include <ATen/ops/_weight_norm_interface_native.h>
#include <ATen/ops/_weight_norm_native.h>
#include <ATen/ops/empty_strided.h>
#include <ATen/ops/norm_except_dim.h>
Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/native/cpu/PowKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace at::native {

inline namespace CPU_CAPABILITY {

static void pow_tensor_tensor_kernel(TensorIteratorBase& iter) {
void pow_tensor_tensor_kernel(TensorIteratorBase& iter) {
const auto dtype = iter.common_dtype();
if (isFloatingType(dtype) || isComplexType(dtype)) {
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, dtype, "pow", [&]() {
Expand Down Expand Up @@ -90,7 +90,7 @@ void reciprocal_kernel(TensorIteratorBase& iter);
void rsqrt_kernel(TensorIteratorBase& iter);
void sqrt_kernel(TensorIteratorBase& iter);

static void pow_tensor_scalar_kernel(
void pow_tensor_scalar_kernel(
TensorIteratorBase& iter,
const Scalar& exp_scalar) {
// prevent multiple calls to iter.common_dtype()
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/native/mkl/SparseBlasImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ namespace mkl {

namespace {

#if AT_USE_MKL_SPARSE()
c10::MaybeOwned<Tensor> prepare_dense_matrix_for_mkl(
const Tensor& tensor) {
if (tensor.is_non_overlapping_and_dense() ||
Expand Down Expand Up @@ -111,6 +110,7 @@ void inline col_indices_and_values_resize_(const Tensor& input, int64_t nnz) {
/*
Resizes `input` tensor and fills it with the data from MKL.
*/
#if AT_USE_MKL_SPARSE()
template <typename scalar_t>
void mkl_result_copy_(const Tensor& input, sparse_matrix_t mkl_desc) {
sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO;
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/native/nested/NestedTensorFactories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace at {
namespace native {

static TensorOptions verify_empty_parameters(
TensorOptions verify_empty_parameters(
const at::Tensor& self,
c10::optional<ScalarType> dtype,
c10::optional<Layout> layout,
Expand Down
58 changes: 58 additions & 0 deletions aten/src/ATen/native/nested/NestedTensorMatmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,64 @@ Tensor bmm_nested(const Tensor& self, const Tensor& mat2) {
return output;
}

// utilities support `matmul_nested`
namespace {
// Args:
// self_sizes: the sizes of `self` in `matmul_nested`
// mat2_sizes: the sizes of `mat2` in `matmul_nested`
// buffer_op: the options for new buffer
// sizemat_op: the options for new size matrix
// Returns:
// the batch size of each input underlying tensor, i.e. the product of batch-dimension sizes
// the empty output nested tensor
inline std::tuple<std::vector<int64_t>, Tensor>
matmul_nested_helper(
const std::vector<IntArrayRef>& self_sizes,
const std::vector<IntArrayRef>& mat2_sizes,
const c10::TensorOptions& buffer_op,
const c10::TensorOptions& sizemat_op) {
int64_t ntensors = self_sizes.size(),
ndims = self_sizes[0].size();
std::vector<int64_t> batch_sizes(ntensors, 1);
Tensor sizemat = at::empty({ntensors, ndims}, sizemat_op);
int64_t* sizemat_ptr = sizemat.mutable_data_ptr<int64_t>();
int64_t numel = 0;
for (int64_t i = 0; i < ntensors; i++) {
const IntArrayRef& self_size = self_sizes[i],
& mat2_size = mat2_sizes[i];
int64_t& batch_size = batch_sizes[i];
// batch dimensions
for (int64_t j = 0; j < ndims - 2; j++) {
const int64_t& self_sizej = self_size[j],
& mat2_sizej = mat2_size[j];
TORCH_CHECK(
self_sizej == mat2_sizej,
"matmul: For nested tensors, no broadcasting is currently performed: ",
i, "-th nested matrices in batch at dimension ", j + 1,
" have mismatching sizes ", self_sizej, " and ", mat2_sizej);
sizemat_ptr[j] = self_sizej;
batch_size *= sizemat_ptr[j];
}
// matrix multiplication dimensions
const int64_t& self_size0 = self_size[ndims - 2], & self_size1 = self_size[ndims - 1],
& mat2_size0 = mat2_size[ndims - 2], & mat2_size1 = mat2_size[ndims - 1];
TORCH_CHECK(
self_size1 == mat2_size0,
"matmul: ",
i, "-th nested matrices in batch cannot be multiplied (",
self_size0, "x", self_size1, " and ",
mat2_size0, "x", mat2_size1, ")");
sizemat_ptr[ndims - 2] = self_size0;
sizemat_ptr[ndims - 1] = mat2_size1;
sizemat_ptr += ndims;
numel += batch_size * self_size0 * mat2_size1;
}
Tensor buffer = at::empty(numel, buffer_op);
Tensor output = wrap_buffer(buffer, sizemat);
return std::make_tuple(batch_sizes, output);
}
}

Tensor matmul_with_bmm_nested(const Tensor& self, const Tensor& mat2) {
// Tensor self = self_.contiguous();
// Tensor mat2 = mat2_.contiguous();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ Tensor fake_quantize_per_channel_affine_cachemask_backward(
return dY * mask;
}

static Tensor _get_rounded_zero_point(
Tensor _get_rounded_zero_point(
const Tensor& zero_point,
int64_t quant_min,
int64_t quant_max) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ Tensor fake_quantize_per_tensor_affine_cachemask_backward(
return dY * mask;
}

static int64_t _get_zero_point_from_tensor(
int64_t _get_zero_point_from_tensor(
const Tensor& zero_point,
int64_t quant_min,
int64_t quant_max,
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/native/quantized/QTensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ std::tuple<double, int64_t> _choose_qparams_per_tensor(
return std::make_tuple(q_params.scale, q_params.zero_point);
}

static float calculate_quant_loss(
float calculate_quant_loss(
const float* input,
int numel,
float xmin,
Expand Down
Loading

0 comments on commit 32ce06a

Please sign in to comment.