forked from openkylin/vc
Compare commits
No commits in common. "pristine-tar" and "openkylin/yangtze" have entirely different histories.
pristine-t
...
openkylin/
|
@ -0,0 +1,130 @@
|
|||
BasedOnStyle: Google
|
||||
|
||||
# The extra indent or outdent of access modifiers, e.g. public:.
|
||||
AccessModifierOffset: -4
|
||||
|
||||
# If true, aligns escaped newlines as far left as possible. Otherwise puts them into the right-most column.
|
||||
AlignEscapedNewlinesLeft: false
|
||||
|
||||
# If true, aligns trailing comments.
|
||||
AlignTrailingComments: true
|
||||
|
||||
# Allow putting all parameters of a function declaration onto the next line even if BinPackParameters is false.
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
|
||||
# If true, if (a) return; can be put on a single line.
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
|
||||
# If true, while (true) continue; can be put on a single line.
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
|
||||
AllowShortFunctionsOnASingleLine: true
|
||||
|
||||
# If true, always break before multiline string literals.
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
|
||||
# If true, always break after the template<...> of a template declaration.
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
|
||||
# If false, a function call’s or function definition’s parameters will either all be on the same line or will have one line each.
|
||||
BinPackParameters: true
|
||||
|
||||
# If true, binary operators will be placed after line breaks.
|
||||
BreakBeforeBinaryOperators: false
|
||||
|
||||
# The brace breaking style to use.
|
||||
# Possible values:
|
||||
# BS_Attach (in configuration: Attach) Always attach braces to surrounding context.
|
||||
# BS_Linux (in configuration: Linux) Like Attach, but break before braces on function, namespace and class definitions.
|
||||
# BS_Stroustrup (in configuration: Stroustrup) Like Attach, but break before function definitions.
|
||||
# BS_Allman (in configuration: Allman) Always break before braces.
|
||||
BreakBeforeBraces: Linux
|
||||
|
||||
# Always break constructor initializers before commas and align the commas with the colon.
|
||||
BreakConstructorInitializersBeforeComma: true
|
||||
|
||||
# The column limit.
|
||||
# A column limit of 0 means that there is no column limit. In this case, clang-format will respect the input’s line breaking decisions within statements.
|
||||
ColumnLimit: 90
|
||||
|
||||
# If the constructor initializers don’t fit on a line, put each initializer on its own line.
|
||||
#ConstructorInitializerAllOnOneLineOrOnePerLine (bool)
|
||||
|
||||
# The number of characters to use for indentation of constructor initializer lists.
|
||||
#ConstructorInitializerIndentWidth (unsigned)
|
||||
|
||||
# If true, format braced lists as best suited for C++11 braced lists.
|
||||
# Important differences: - No spaces inside the braced list. - No line break before the closing brace. - Indentation with the continuation indent, not with the block indent.
|
||||
# Fundamentally, C++11 braced lists are formatted exactly like function calls would be formatted in their place. If the braced list follows a name (e.g. a type or variable name), clang-format formats as if the {} were the parentheses of a function call with that name. If there is no name, a zero-length name is assumed.
|
||||
Cpp11BracedListStyle: true
|
||||
|
||||
# If true, analyze the formatted file for the most common binding.
|
||||
#DerivePointerBinding (bool)
|
||||
|
||||
# If true, clang-format detects whether function calls and definitions are formatted with one parameter per line.
|
||||
# Each call can be bin-packed, one-per-line or inconclusive. If it is inconclusive, e.g. completely on one line, but a decision needs to be made, clang-format analyzes whether there are other bin-packed cases in the input file and act accordingly.
|
||||
# NOTE: This is an experimental flag, that might go away or be renamed. Do not use this in config files, etc. Use at your own risk.
|
||||
#ExperimentalAutoDetectBinPacking (bool)
|
||||
|
||||
# Indent case labels one level from the switch statement.
|
||||
# When false, use the same indentation level as for the switch statement. Switch statement body is always indented one level more than case labels.
|
||||
IndentCaseLabels: false
|
||||
|
||||
# If true, indent when breaking function declarations which are not also definitions after the type.
|
||||
#IndentFunctionDeclarationAfterType (bool)
|
||||
|
||||
# The number of characters to use for indentation.
|
||||
IndentWidth: 4
|
||||
|
||||
# The maximum number of consecutive empty lines to keep.
|
||||
MaxEmptyLinesToKeep: 1
|
||||
|
||||
# The indentation used for namespaces.
|
||||
# Possible values:
|
||||
# NI_None (in configuration: None) Don’t indent in namespaces.
|
||||
# NI_Inner (in configuration: Inner) Indent only in inner namespaces (nested in other namespaces).
|
||||
# NI_All (in configuration: All) Indent in all namespaces.
|
||||
NamespaceIndentation: None
|
||||
|
||||
# Add a space in front of an Objective-C protocol list, i.e. use Foo <Protocol> instead of Foo<Protocol>.
|
||||
#ObjCSpaceBeforeProtocolList (bool)
|
||||
|
||||
# The penalty for each line break introduced inside a comment.
|
||||
#PenaltyBreakComment (unsigned)
|
||||
|
||||
# The penalty for breaking before the first <<.
|
||||
#PenaltyBreakFirstLessLess (unsigned)
|
||||
|
||||
# The penalty for each line break introduced inside a string literal.
|
||||
#PenaltyBreakString (unsigned)
|
||||
# The penalty for each character outside of the column limit.
|
||||
#PenaltyExcessCharacter (unsigned)
|
||||
# Penalty for putting the return type of a function onto its own line.
|
||||
#PenaltyReturnTypeOnItsOwnLine (unsigned)
|
||||
# Set whether & and * bind to the type as opposed to the variable.
|
||||
#PointerBindsToType: false
|
||||
# If true, spaces will be inserted between ‘for’/’if’/’while’/... and ‘(‘.
|
||||
#SpaceAfterControlStatementKeyword: true
|
||||
# If false, spaces will be removed before ‘=’, ‘+=’, etc.
|
||||
#SpaceBeforeAssignmentOperators: true
|
||||
# If false, spaces may be inserted into ‘()’.
|
||||
#SpaceInEmptyParentheses: false
|
||||
# The number of spaces to before trailing line comments.
|
||||
#SpacesBeforeTrailingComments (unsigned)
|
||||
# If false, spaces may be inserted into C style casts.
|
||||
#SpacesInCStyleCastParentheses (bool)
|
||||
|
||||
# If true, spaces will be inserted after every ‘(‘ and before every ‘)’.
|
||||
SpacesInParentheses: false
|
||||
|
||||
# Format compatible with this standard, e.g. use A<A<int> > instead of A<A<int>> for LS_Cpp03.
|
||||
# Possible values:
|
||||
# LS_Cpp03 (in configuration: Cpp03) Use C++03-compatible syntax.
|
||||
# LS_Cpp11 (in configuration: Cpp11) Use features of C++11 (e.g. A<A<int>> instead of A<A<int> >).
|
||||
# LS_Auto (in configuration: Auto) Automatic detection based on the input.
|
||||
Standard: Cpp11
|
||||
|
||||
# If true, IndentWidth consecutive spaces will be replaced with tab characters.
|
||||
UseTab: false
|
||||
|
||||
# vim: ft=yaml
|
|
@ -0,0 +1,77 @@
|
|||
## Copyright and License
|
||||
|
||||
Vc is licensed with the [3-clause BSD license](http://opensource.org/licenses/BSD-3-Clause).
|
||||
Your contributions to Vc must be released under the same license. You must add
|
||||
your copyright information to the files you modified/added.
|
||||
|
||||
## Code Formatting & Style
|
||||
|
||||
The recommended way is to format the code according to `clang-format` using the
|
||||
`.clang-format` file in the repository.
|
||||
|
||||
In addition to the `clang-format` style, `if`, `else`, `for`, `while`, and `do`
|
||||
*must* use braces.
|
||||
|
||||
If, for some reason, you cannot use `clang-format`, here's a quick overview of
|
||||
the style rules:
|
||||
* Constrain the code to no more than 90 characters per line.
|
||||
* Use four spaces for indent. No tabs.
|
||||
* Opening braces attach to the preceding expression, except for functions,
|
||||
namespaces, and classes/structs/unions/enums.
|
||||
* Namespaces introduce no additional indent
|
||||
* `case` labels are aligned with the `switch` statement
|
||||
* No more than one empty line.
|
||||
* No spaces in parentheses, but spaces between keywords and opening paren, i.e.
|
||||
`if (foo) { bar(); }`
|
||||
|
||||
### Naming Rules
|
||||
|
||||
* Naming is very important. Take time to choose a name that clearly explains the
|
||||
intended functionality & usage of the entity.
|
||||
* Type names typically use `CamelCase`. No underscores.
|
||||
* Function and variable names use `camelCase`. No underscores.
|
||||
* Acronyms that appear in camel case names must use lowercase letters for all
|
||||
characters after the first characters. (e.g. `SimdArray`, `simdFunction`)
|
||||
* Traits use `lower_case_with_underscores`.
|
||||
* Macros are prefixed with `Vc_` and use `Vc_ALL_CAPITALS_WITH_UNDERSCORES`.
|
||||
Macro arguments use a single underscore suffix.
|
||||
Include guards are prefixed with `VC_` instead.
|
||||
* File names use `alllowercasewithoutunderscores`. Basically, it is the type name
|
||||
declared/defined in the file with all letters in lower case.
|
||||
* There are exceptions and inconsistencies in the code. Don't bother.
|
||||
|
||||
### Design Guidelines
|
||||
|
||||
* *Avoid out parameters.* Use the return value insted. Use `std::tuple` if you
|
||||
need to return multiple values.
|
||||
* *Look for alternatives to in-out parameters.* An obvious exception (and thus
|
||||
design alternative) is the implicit `this` parameter to non-static member
|
||||
functions.
|
||||
* Consequently, *pass function parameters by const-ref or by value.*
|
||||
Use const-ref for types that (potentially) require more than two CPU
|
||||
registers. (Consider fundamental types and the fundamental `Vector<T>` types
|
||||
to require one register, each.)
|
||||
By value otherwise.
|
||||
* *Ensure const-correctness.* Member functions use the `const` qualifier if they
|
||||
do not modify observable state. Use `mutable` members for unobservable state.
|
||||
* *Avoid macros.* Possible alternatives are constexpr variables and template
|
||||
code.
|
||||
|
||||
## Git History
|
||||
|
||||
Git history should be flat, if feasible. Feel free to use merges on your private
|
||||
branch. However, once you submit a pull request, the history should apply
|
||||
cleanly on top of master. Use `git rebase [-i]` to straighten the history.
|
||||
|
||||
Use different branches for different issues.
|
||||
|
||||
## Git Commit Logs
|
||||
|
||||
1. Write meaningful summaries and strive to use no more than 50 characters
|
||||
1. Use imperative mood in the subject line (and possibly in bullet points in the
|
||||
summary)
|
||||
1. Wrap the body at 72 characters
|
||||
1. Use the body to explain *what* and *why* (normally it is irrelevant *how* you
|
||||
did it)
|
||||
|
||||
See also [Chris Beams article](http://chris.beams.io/posts/git-commit/).
|
|
@ -0,0 +1,18 @@
|
|||
<!--
|
||||
Vc is now in maintenance mode and no longer actively developed.
|
||||
However, we continue to review pull requests with bugfixes from the community.
|
||||
If your issue is trivial to fix, we might be able to address it.
|
||||
Otherwise, please provide a pull request in addition to your issue.
|
||||
-->
|
||||
|
||||
Vc version / revision | Operating System | Compiler & Version | Compiler Flags | Assembler & Version | CPU
|
||||
----------------------|------------------|--------------------|----------------|---------------------|----
|
||||
| | | | |
|
||||
|
||||
## Testcase
|
||||
```cpp
|
||||
```
|
||||
|
||||
## Actual Results
|
||||
|
||||
## Expected Results
|
|
@ -0,0 +1,85 @@
|
|||
name: CI
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
schedule:
|
||||
- cron: '0 3 * * *'
|
||||
jobs:
|
||||
#clang-format:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v2
|
||||
# - uses: DoozyX/clang-format-lint-action@v0.12
|
||||
# with:
|
||||
# exclude: './thirdparty'
|
||||
# clangFormatVersion: 12
|
||||
|
||||
build-ubuntu:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
dashboard_model: Experimental
|
||||
build_type: ${{ matrix.build_type }}
|
||||
NUMBER_OF_PROCESSORS: 2
|
||||
CXX: ${{ matrix.cxx }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [Debug, Release]
|
||||
cxx: [g++-9, g++-10, g++-11, clang++-10, clang++-11, clang++-12, icpc]
|
||||
include:
|
||||
- cxx: g++-11
|
||||
INSTALL_EXTRA: g++-11
|
||||
- cxx: clang++-11
|
||||
INSTALL_EXTRA: clang-11
|
||||
- cxx: clang++-12
|
||||
INSTALL_EXTRA: clang-12
|
||||
- cxx: icpc
|
||||
INSTALL_ONEAPI: true
|
||||
exclude:
|
||||
# icpc in debug mode runs out of memory in CI
|
||||
- cxx: icpc
|
||||
build_type: Debug
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- name: install OneAPI
|
||||
if: ${{ matrix.INSTALL_ONEAPI }}
|
||||
run: |
|
||||
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||
sudo apt update
|
||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
|
||||
- name: install extras
|
||||
if: ${{ matrix.INSTALL_EXTRA }}
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install ${{ matrix.INSTALL_EXTRA }}
|
||||
- name: ctest
|
||||
run: |
|
||||
if [ ${{ matrix.INSTALL_ONEAPI }} ]
|
||||
then
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
export LC_ALL=en_US.utf8
|
||||
fi
|
||||
$CXX --version
|
||||
ctest -VV -S test.cmake
|
||||
|
||||
build-windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
env:
|
||||
build_type: ${{ matrix.build_type }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [Debug, Release]
|
||||
os: [windows-2019]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- uses: egor-tensin/vs-shell@v2
|
||||
- name: ctest
|
||||
run: |
|
||||
ctest -VV -S test.cmake
|
|
@ -0,0 +1,11 @@
|
|||
doc/html
|
||||
doc/latex
|
||||
doc/man
|
||||
vc-benchmarks
|
||||
*.swp
|
||||
*~
|
||||
.makeApidox.stamp
|
||||
.makeApidox.stamp.new
|
||||
build-*
|
||||
.vs
|
||||
out
|
|
@ -0,0 +1,6 @@
|
|||
[submodule "tests/testdata"]
|
||||
path = tests/testdata
|
||||
url = https://github.com/VcDevel/vc-testdata
|
||||
[submodule "tests/virtest"]
|
||||
path = tests/virtest
|
||||
url = https://github.com/mattkretz/virtest
|
|
@ -0,0 +1,275 @@
|
|||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
cmake_policy(SET CMP0028 NEW) # Double colon in target name means ALIAS or IMPORTED target.
|
||||
cmake_policy(SET CMP0048 NEW) # The ``project()`` command manages VERSION variables.
|
||||
|
||||
if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR)
|
||||
message(FATAL_ERROR "You don't want to configure in the source directory!")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING
|
||||
"Choose the type of build, options are: None Debug Release RelWithDebug RelWithDebInfo MinSizeRel."
|
||||
FORCE)
|
||||
endif()
|
||||
|
||||
# read version parts from version.h
|
||||
file(STRINGS ${CMAKE_CURRENT_SOURCE_DIR}/Vc/version.h _version_lines REGEX "^#define Vc_VERSION_STRING ")
|
||||
string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _version_matches "${_version_lines}")
|
||||
|
||||
project(Vc VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" LANGUAGES C CXX)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
|
||||
|
||||
set(disabled_targets)
|
||||
|
||||
include (VcMacros)
|
||||
include (AddTargetProperty)
|
||||
include (OptimizeForArchitecture)
|
||||
|
||||
vc_determine_compiler()
|
||||
|
||||
if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(i686|x86|AMD64|amd64)")
|
||||
set(Vc_X86 TRUE)
|
||||
elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)")
|
||||
message(WARNING "No optimized implementation of the Vc types available for ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(Vc_ARM TRUE)
|
||||
else()
|
||||
message(WARNING "No optimized implementation of the Vc types available for ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
|
||||
option(USE_CCACHE "If enabled, ccache will be used (if it exists on the system) to speed up recompiles." OFF)
|
||||
if(USE_CCACHE)
|
||||
find_program(CCACHE_COMMAND ccache)
|
||||
if(CCACHE_COMMAND)
|
||||
mark_as_advanced(CCACHE_COMMAND)
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_COMMAND}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT Vc_COMPILER_IS_MSVC)
|
||||
AddCompilerFlag("-std=c++14" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS)
|
||||
if(NOT _ok)
|
||||
AddCompilerFlag("-std=c++1y" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS)
|
||||
if(NOT _ok)
|
||||
AddCompilerFlag("-std=c++11" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS)
|
||||
if(NOT _ok)
|
||||
AddCompilerFlag("-std=c++0x" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS)
|
||||
if(NOT _ok)
|
||||
message(FATAL_ERROR "Vc 1.x requires C++11, better even C++14. It seems this is not available. If this was incorrectly determined please notify vc-devel@compeng.uni-frankfurt.de")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
elseif(MSVC_VERSION LESS 1920)
|
||||
message(FATAL_ERROR "Vc 1.x requires at least Visual Studio 2019.")
|
||||
AddCompilerFlag("/std:c++14" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
if(MSVC AND (NOT DEFINED Vc_USE_MSVC_SSA_OPTIMIZER_DESPITE_BUGGY_EXP OR NOT Vc_USE_MSVC_SSA_OPTIMIZER_DESPITE_BUGGY_EXP))
|
||||
# bug report: https://developercommunity.visualstudio.com/t/AVX-codegen-bug-on-Vc-with-MSVC-2019/1470844#T-N1521672
|
||||
message(STATUS "WARNING! MSVC starting with 19.20 uses a new optimizer that has a bug causing Vc::exp() to return slighly wrong results.\
|
||||
You can set Vc_USE_MSVC_SSA_OPTIMIZER_DESPITE_BUGGY_EXP=ON to still use the new optimizer on the affected MSVC versions.")
|
||||
AddCompilerFlag("/d2SSAOptimizer-" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
if(Vc_COMPILER_IS_GCC)
|
||||
if(Vc_GCC_VERSION VERSION_GREATER "5.0.0" AND Vc_GCC_VERSION VERSION_LESS "6.0.0")
|
||||
UserWarning("GCC 5 goes into an endless loop comiling example_scaling_scalar. Therefore, this target is disabled.")
|
||||
list(APPEND disabled_targets
|
||||
example_scaling_scalar
|
||||
)
|
||||
endif()
|
||||
elseif(Vc_COMPILER_IS_MSVC)
|
||||
# Disable warning "C++ exception specification ignored except to indicate a function is not __declspec(nothrow)"
|
||||
# MSVC emits the warning for the _UnitTest_Compare desctructor which needs the throw declaration so that it doesn't std::terminate
|
||||
AddCompilerFlag("/wd4290")
|
||||
endif()
|
||||
|
||||
vc_set_preferred_compiler_flags(WARNING_FLAGS BUILDTYPE_FLAGS)
|
||||
|
||||
add_definitions(${Vc_DEFINITIONS})
|
||||
add_compile_options(${Vc_COMPILE_FLAGS})
|
||||
|
||||
if(Vc_COMPILER_IS_INTEL)
|
||||
# per default icc is not IEEE compliant, but we need that for verification
|
||||
AddCompilerFlag("-fp-model source")
|
||||
endif()
|
||||
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "" AND NOT CMAKE_CXX_FLAGS MATCHES "-O[123]")
|
||||
message(STATUS "WARNING! It seems you are compiling without optimization. Please set CMAKE_BUILD_TYPE.")
|
||||
endif(CMAKE_BUILD_TYPE STREQUAL "" AND NOT CMAKE_CXX_FLAGS MATCHES "-O[123]")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}) # ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
|
||||
add_custom_target(other VERBATIM)
|
||||
add_custom_target(Scalar COMMENT "build Scalar code" VERBATIM)
|
||||
add_custom_target(SSE COMMENT "build SSE code" VERBATIM)
|
||||
add_custom_target(AVX COMMENT "build AVX code" VERBATIM)
|
||||
add_custom_target(AVX2 COMMENT "build AVX2 code" VERBATIM)
|
||||
|
||||
AddCompilerFlag(-ftemplate-depth=128 CXX_FLAGS CMAKE_CXX_FLAGS)
|
||||
|
||||
set(libvc_compile_flags "-DVc_COMPILE_LIB")
|
||||
AddCompilerFlag("-fPIC" CXX_FLAGS libvc_compile_flags)
|
||||
|
||||
# -fstack-protector is the default of GCC, but at least Ubuntu changes the default to -fstack-protector-strong, which is crazy
|
||||
AddCompilerFlag("-fstack-protector" CXX_FLAGS libvc_compile_flags)
|
||||
|
||||
set(_srcs src/const.cpp)
|
||||
if(Vc_X86)
|
||||
list(APPEND _srcs src/cpuid.cpp src/support_x86.cpp)
|
||||
vc_compile_for_all_implementations(_srcs src/trigonometric.cpp ONLY SSE2 SSE3 SSSE3 SSE4_1 AVX AVX+FMA AVX2+FMA+BMI2)
|
||||
if(NOT Vc_XOP_INTRINSICS_BROKEN)
|
||||
vc_compile_for_all_implementations(_srcs src/trigonometric.cpp ONLY AVX+XOP+FMA)
|
||||
if(NOT Vc_FMA4_INTRINSICS_BROKEN)
|
||||
vc_compile_for_all_implementations(_srcs src/trigonometric.cpp ONLY SSE+XOP+FMA4 AVX+XOP+FMA4)
|
||||
endif()
|
||||
endif()
|
||||
vc_compile_for_all_implementations(_srcs src/sse_sorthelper.cpp ONLY SSE2 SSE4_1 AVX AVX2+FMA+BMI2)
|
||||
vc_compile_for_all_implementations(_srcs src/avx_sorthelper.cpp ONLY AVX AVX2+FMA+BMI2)
|
||||
elseif(Vc_ARM)
|
||||
list(APPEND _srcs src/support_dummy.cpp)
|
||||
else()
|
||||
list(APPEND _srcs src/support_dummy.cpp)
|
||||
endif()
|
||||
add_library(Vc STATIC ${_srcs})
|
||||
set_property(TARGET Vc APPEND PROPERTY COMPILE_OPTIONS ${libvc_compile_flags})
|
||||
add_target_property(Vc LABELS "other")
|
||||
if(XCODE)
|
||||
# TODO: document what this does and why it has no counterpart in the non-XCODE logic
|
||||
set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_GCC_INLINES_ARE_PRIVATE_EXTERN "NO")
|
||||
set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES")
|
||||
set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++0x")
|
||||
set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
|
||||
elseif(UNIX AND Vc_COMPILER_IS_CLANG)
|
||||
# On UNIX (Linux) the standard library used by default typically is libstdc++ (GCC).
|
||||
# To get the full clang deal we rather want to build against libc++. This requires
|
||||
# additionally the libc++abi and libsupc++ libraries in all linker invokations.
|
||||
option(USE_LIBC++ "Use libc++ instead of the system default C++ standard library." OFF)
|
||||
if(USE_LIBC++)
|
||||
AddCompilerFlag(-stdlib=libc++ CXX_FLAGS CMAKE_CXX_FLAGS CXX_RESULT _use_libcxx)
|
||||
if(_use_libcxx)
|
||||
find_library(LIBC++ABI c++abi)
|
||||
mark_as_advanced(LIBC++ABI)
|
||||
if(LIBC++ABI)
|
||||
set(CMAKE_REQUIRED_LIBRARIES "${LIBC++ABI};supc++")
|
||||
CHECK_CXX_SOURCE_COMPILES("#include <stdexcept>
|
||||
#include <iostream>
|
||||
void foo() {
|
||||
std::cout << 'h' << std::flush << std::endl;
|
||||
throw std::exception();
|
||||
}
|
||||
int main() {
|
||||
try { foo(); }
|
||||
catch (int) { return 0; }
|
||||
return 1;
|
||||
}" libcxx_compiles)
|
||||
unset(CMAKE_REQUIRED_LIBRARIES)
|
||||
if(libcxx_compiles)
|
||||
link_libraries(${LIBC++ABI} supc++)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
CHECK_CXX_SOURCE_COMPILES("#include <tuple>
|
||||
std::tuple<int> f() { std::tuple<int> r; return r; }
|
||||
int main() { return 0; }
|
||||
" tuple_sanity)
|
||||
if (NOT tuple_sanity)
|
||||
message(FATAL_ERROR "Clang and std::tuple brokenness detected. Please update your compiler.")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
add_dependencies(other Vc)
|
||||
target_include_directories(Vc
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
|
||||
option(Vc_ENABLE_INSTALL "Whether to install the library." ON)
|
||||
if (Vc_ENABLE_INSTALL)
|
||||
install(TARGETS Vc EXPORT VcTargets DESTINATION lib${LIB_SUFFIX})
|
||||
install(DIRECTORY Vc/ DESTINATION include/Vc FILES_MATCHING REGEX "/*.(h|tcc|def)$")
|
||||
install(FILES
|
||||
Vc/Allocator
|
||||
Vc/IO
|
||||
Vc/Memory
|
||||
Vc/SimdArray
|
||||
Vc/Utils
|
||||
Vc/Vc
|
||||
Vc/algorithm
|
||||
Vc/array
|
||||
Vc/iterators
|
||||
Vc/limits
|
||||
Vc/simdize
|
||||
Vc/span
|
||||
Vc/type_traits
|
||||
Vc/vector
|
||||
DESTINATION include/Vc)
|
||||
|
||||
# Generate and install CMake package and modules
|
||||
include(CMakePackageConfigHelpers)
|
||||
set(PACKAGE_INSTALL_DESTINATION
|
||||
lib${LIB_SUFFIX}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
install(EXPORT ${PROJECT_NAME}Targets
|
||||
NAMESPACE ${PROJECT_NAME}::
|
||||
DESTINATION ${PACKAGE_INSTALL_DESTINATION}
|
||||
EXPORT_LINK_INTERFACE_LIBRARIES
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
configure_package_config_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake
|
||||
INSTALL_DESTINATION ${PACKAGE_INSTALL_DESTINATION}
|
||||
PATH_VARS CMAKE_INSTALL_PREFIX
|
||||
)
|
||||
install(FILES
|
||||
cmake/UserWarning.cmake
|
||||
cmake/VcMacros.cmake
|
||||
cmake/AddCompilerFlag.cmake
|
||||
cmake/CheckCCompilerFlag.cmake
|
||||
cmake/CheckCXXCompilerFlag.cmake
|
||||
cmake/OptimizeForArchitecture.cmake
|
||||
cmake/FindVc.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake
|
||||
DESTINATION ${PACKAGE_INSTALL_DESTINATION}
|
||||
)
|
||||
endif()
|
||||
|
||||
option(BUILD_TESTING "Build the testing tree." OFF)
|
||||
include (CTest)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/CTestCustom.cmake ${PROJECT_BINARY_DIR}/CTestCustom.cmake COPYONLY)
|
||||
if(BUILD_TESTING)
|
||||
add_custom_target(build_tests ALL VERBATIM)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
set(BUILD_EXAMPLES FALSE CACHE BOOL "Build examples.")
|
||||
if(BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif(BUILD_EXAMPLES)
|
||||
|
||||
# Hide Vc_IMPL as it is only meant for users of Vc
|
||||
mark_as_advanced(Vc_IMPL)
|
||||
|
||||
find_program(BIN_CAT cat)
|
||||
mark_as_advanced(BIN_CAT)
|
||||
if(BIN_CAT)
|
||||
file(REMOVE ${PROJECT_BINARY_DIR}/help.txt)
|
||||
add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/help.txt
|
||||
COMMAND ${CMAKE_MAKE_PROGRAM} help > ${PROJECT_BINARY_DIR}/help.txt
|
||||
VERBATIM
|
||||
)
|
||||
add_custom_target(cached_help
|
||||
${BIN_CAT} ${PROJECT_BINARY_DIR}/help.txt
|
||||
DEPENDS ${PROJECT_BINARY_DIR}/help.txt
|
||||
VERBATIM
|
||||
)
|
||||
endif()
|
|
@ -0,0 +1,15 @@
|
|||
set(CTEST_PROJECT_NAME "Vc")
|
||||
set(CTEST_NIGHTLY_START_TIME "00:00:00 CEST")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "cdash.cern.ch")
|
||||
set(CTEST_DROP_LOCATION "/submit.php?project=Vc")
|
||||
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||
|
||||
set(CTEST_UPDATE_TYPE "git")
|
||||
|
||||
find_program(GITCOMMAND git)
|
||||
set(CTEST_UPDATE_COMMAND "${GITCOMMAND}")
|
||||
|
||||
mark_as_advanced(GITCOMMAND)
|
|
@ -0,0 +1,21 @@
|
|||
set(CTEST_CUSTOM_WARNING_EXCEPTION ${CTEST_CUSTOM_WARNING_EXCEPTION}
|
||||
" C4723: " # MSVC 2012 can't suppress this warning
|
||||
" C4756: " # MSVC 2012 can't suppress this warning
|
||||
"used uninitialized in this function"
|
||||
"Skipping compilation of tests gatherStruct and gather2dim because of clang bug" # Not a helpful warning for the dashboard
|
||||
"warning is a GCC extension"
|
||||
"^-- " # Ignore output from cmake
|
||||
"AVX disabled per default because of old/broken compiler" # This warning is meant for users not the dashboard
|
||||
"WARNING non-zero return value in ctest from: make" # Ignore output from ctest
|
||||
"ipo: warning #11010:" # Ignore warning about incompatible libraries with ICC -m32 on 64-bit system
|
||||
"include/qt4" # -Wuninitialized in QWeakPointer(X *ptr)
|
||||
" note: " # Notes are additional lines from errors (or warnings) that we don't want to count as additional warnings
|
||||
"clang: warning: argument unused during compilation: '-stdlib=libc"
|
||||
"clang 3.6.x miscompiles AVX code" # a preprocessor warning for users of Vc, irrelevant for the dashboard
|
||||
)
|
||||
|
||||
set(CTEST_CUSTOM_ERROR_EXCEPTION ${CTEST_CUSTOM_ERROR_EXCEPTION}
|
||||
"^ICECC"
|
||||
"^make\\[[1-9]\\]: "
|
||||
"^collect2: ld returned . exit status"
|
||||
"^make: \\*\\*\\* \\[.*\\] Error ")
|
|
@ -0,0 +1,23 @@
|
|||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,27 @@
|
|||
CXX ?= c++
|
||||
build_dir := $(shell which $(CXX))
|
||||
tmp := "case $$(readlink -f $(build_dir)) in *icecc) which $${ICECC_CXX:-g++};; *) echo $(build_dir);; esac"
|
||||
build_dir := $(shell sh -c $(tmp))
|
||||
build_dir := $(realpath $(build_dir))
|
||||
build_dir := build-$(subst /,-,$(build_dir:/%=%)$(CXXFLAGS))
|
||||
|
||||
all:
|
||||
%:: $(build_dir)/CMakeCache.txt
|
||||
$(MAKE) --no-print-directory -C "$(build_dir)" $(MAKECMDGOALS)
|
||||
|
||||
$(build_dir)/CMakeCache.txt:
|
||||
@test -n "$(build_dir)"
|
||||
@mkdir -p "$(build_dir)"
|
||||
@test -e "$(build_dir)/CMakeCache.txt" || cmake -H. -B"$(build_dir)"
|
||||
|
||||
print_build_dir:
|
||||
@echo "$(PWD)/$(build_dir)"
|
||||
|
||||
clean_builddir:
|
||||
rm -rf "$(build_dir)"
|
||||
|
||||
# the following rule works around %:: grabbing the Makefile rule and thus stops it from running every time
|
||||
Makefile:
|
||||
@true
|
||||
|
||||
.PHONY: print_build_dir clean_builddir
|
|
@ -0,0 +1,194 @@
|
|||
**Vc is now in maintenance mode and no longer actively developed.
|
||||
However, we continue to review pull requests with bugfixes from the community.**
|
||||
|
||||
**You may be interested in switching to [std-simd](https://github.com/VcDevel/std-simd).**
|
||||
GCC 11 includes an experimental version of `std::simd` as part of libstdc++, which also works with clang.
|
||||
Features present in Vc 1.4 and not present in *std-simd* will eventually turn into Vc 2.0,which then depends on *std-simd*.
|
||||
|
||||
# Vc: portable, zero-overhead C++ types for explicitly data-parallel programming
|
||||
|
||||
Recent generations of CPUs, and GPUs in particular, require data-parallel codes
|
||||
for full efficiency. Data parallelism requires that the same sequence of
|
||||
operations is applied to different input data. CPUs and GPUs can thus reduce
|
||||
the necessary hardware for instruction decoding and scheduling in favor of more
|
||||
arithmetic and logic units, which execute the same instructions synchronously.
|
||||
On CPU architectures this is implemented via SIMD registers and instructions.
|
||||
A single SIMD register can store N values and a single SIMD instruction can
|
||||
execute N operations on those values. On GPU architectures N threads run in
|
||||
perfect sync, fed by a single instruction decoder/scheduler. Each thread has
|
||||
local memory and a given index to calculate the offsets in memory for loads and
|
||||
stores.
|
||||
|
||||
Current C++ compilers can do automatic transformation of scalar codes to SIMD
|
||||
instructions (auto-vectorization). However, the compiler must reconstruct an
|
||||
intrinsic property of the algorithm that was lost when the developer wrote a
|
||||
purely scalar implementation in C++. Consequently, C++ compilers cannot
|
||||
vectorize any given code to its most efficient data-parallel variant.
|
||||
Especially larger data-parallel loops, spanning over multiple functions or even
|
||||
translation units, will often not be transformed into efficient SIMD code.
|
||||
|
||||
The Vc library provides the missing link. Its types enable explicitly stating
|
||||
data-parallel operations on multiple values. The parallelism is therefore added
|
||||
via the type system. Competing approaches state the parallelism via new control
|
||||
structures and consequently new semantics inside the body of these control
|
||||
structures.
|
||||
|
||||
Vc is a free software library to ease explicit vectorization of C++ code. It
|
||||
has an intuitive API and provides portability between different compilers and
|
||||
compiler versions as well as portability between different vector instruction
|
||||
sets. Thus an application written with Vc can be compiled for:
|
||||
|
||||
* AVX and AVX2
|
||||
* SSE2 up to SSE4.2 or SSE4a
|
||||
* Scalar
|
||||
* ~~AVX-512 (Vc 2 development)~~
|
||||
* ~~NEON (in development)~~
|
||||
* ~~NVIDIA GPUs / CUDA (research)~~
|
||||
|
||||
After Intel dropped MIC support with ICC 18, Vc 1.4 also removed support for it.
|
||||
|
||||
## Examples
|
||||
|
||||
### Usage on Compiler Explorer
|
||||
|
||||
* [Simdize Example](https://godbolt.org/z/JVEM2j)
|
||||
* [Total momentum and time stepping of `std::vector<Particle>`](https://godbolt.org/z/JNdkL9)
|
||||
* [Matrix Example](https://godbolt.org/z/fFEkuX): This uses vertical
|
||||
vectorization which does not scale to different vector sizes. However, the
|
||||
example is instructive to compare it with similar solutions of other languages
|
||||
or libraries.
|
||||
* [N-vortex solver](https://godbolt.org/z/4o1cg_) showing `simdize`d iteration
|
||||
over many `std::vector<float>`. Note how [important the `-march` flag is, compared
|
||||
to plain `-mavx2 -mfma`](https://godbolt.org/z/hKiOjr).
|
||||
|
||||
### Scalar Product
|
||||
|
||||
Let's start from the code for calculating a 3D scalar product using builtin floats:
|
||||
```cpp
|
||||
using Vec3D = std::array<float, 3>;
|
||||
float scalar_product(Vec3D a, Vec3D b) {
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
||||
}
|
||||
```
|
||||
Using Vc, we can easily vectorize the code using the `float_v` type:
|
||||
```cpp
|
||||
using Vc::float_v
|
||||
using Vec3D = std::array<float_v, 3>;
|
||||
float_v scalar_product(Vec3D a, Vec3D b) {
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
||||
}
|
||||
```
|
||||
The above will scale to 1, 4, 8, 16, etc. scalar products calculated in parallel, depending
|
||||
on the target hardware's capabilities.
|
||||
|
||||
For comparison, the same vectorization using Intel SSE intrinsics is more verbose and uses
|
||||
prefix notation (i.e. function calls):
|
||||
```cpp
|
||||
using Vec3D = std::array<__m128, 3>;
|
||||
__m128 scalar_product(Vec3D a, Vec3D b) {
|
||||
return _mm_add_ps(_mm_add_ps(_mm_mul_ps(a[0], b[0]), _mm_mul_ps(a[1], b[1])),
|
||||
_mm_mul_ps(a[2], b[2]));
|
||||
}
|
||||
```
|
||||
The above will neither scale to AVX, AVX-512, etc. nor is it portable to other SIMD ISAs.
|
||||
|
||||
## Build Requirements
|
||||
|
||||
cmake >= 3.0
|
||||
|
||||
C++11 Compiler:
|
||||
|
||||
* GCC >= 4.8.1
|
||||
* clang >= 3.4
|
||||
* ICC >= 18.0.5
|
||||
* Visual Studio 2019 (64-bit target)
|
||||
|
||||
|
||||
## Building and Installing Vc
|
||||
|
||||
* Clone Vc and initialize Vc's git submodules:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/VcDevel/Vc.git
|
||||
cd Vc
|
||||
git submodule update --init
|
||||
```
|
||||
|
||||
* Create a build directory:
|
||||
|
||||
```sh
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
```
|
||||
|
||||
* Configure with cmake and add relevant options:
|
||||
|
||||
```sh
|
||||
$ cmake ..
|
||||
```
|
||||
|
||||
Optionally, specify an installation directory:
|
||||
|
||||
```sh
|
||||
$ cmake -DCMAKE_INSTALL_PREFIX=/opt/Vc ..
|
||||
```
|
||||
|
||||
Optionally, include building the unit tests:
|
||||
|
||||
```sh
|
||||
$ cmake -DBUILD_TESTING=ON ..
|
||||
```
|
||||
|
||||
On Windows, if you have multiple versions of Visual Studio installed, you can select one:
|
||||
|
||||
```sh
|
||||
$ cmake -G "Visual Studio 16 2019" ..
|
||||
```
|
||||
|
||||
See `cmake --help` for a list of possible generators.
|
||||
|
||||
|
||||
* Build and install:
|
||||
|
||||
```sh
|
||||
$ cmake --build . -j 16
|
||||
$ cmake --install . # may require permissions
|
||||
```
|
||||
|
||||
On Windows, you can also open `Vc.sln` in Visual Studio and build/install from the IDE.
|
||||
|
||||
## Documentation
|
||||
|
||||
The documentation is generated via [doxygen](http://doxygen.org). You can build
|
||||
the documentation by running `doxygen` in the `doc` subdirectory.
|
||||
Alternatively, you can find nightly builds of the documentation at:
|
||||
|
||||
* [1.4 branch](https://vcdevel.github.io/Vc-1.4/)
|
||||
* [1.4.3 release](https://vcdevel.github.io/Vc-1.4.3/)
|
||||
* [1.4.2 release](https://vcdevel.github.io/Vc-1.4.2/)
|
||||
* [1.4.1 release](https://vcdevel.github.io/Vc-1.4.1/)
|
||||
* [1.4.0 release](https://vcdevel.github.io/Vc-1.4.0/)
|
||||
* [1.3 branch](https://vcdevel.github.io/Vc-1.3/)
|
||||
* [1.3.0 release](https://vcdevel.github.io/Vc-1.3.0/)
|
||||
* [1.2.0 release](https://vcdevel.github.io/Vc-1.2.0/)
|
||||
* [1.1.0 release](https://vcdevel.github.io/Vc-1.1.0/)
|
||||
* [0.7 branch](https://vcdevel.github.io/Vc-0.7/)
|
||||
|
||||
## Publications
|
||||
|
||||
* [M. Kretz, "Extending C++ for Explicit Data-Parallel Programming via SIMD
|
||||
Vector Types", Goethe University Frankfurt, Dissertation,
|
||||
2015.](http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/38415)
|
||||
* [M. Kretz and V. Lindenstruth, "Vc: A C++ library for explicit
|
||||
vectorization", Software: Practice and Experience,
|
||||
2011.](http://dx.doi.org/10.1002/spe.1149)
|
||||
* [M. Kretz, "Efficient Use of Multi- and Many-Core Systems with Vectorization
|
||||
and Multithreading", University of Heidelberg,
|
||||
2009.](http://code.compeng.uni-frankfurt.de/attachments/13/Diplomarbeit.pdf)
|
||||
|
||||
[Work on integrating the functionality of Vc in the C++ standard library.](
|
||||
https://github.com/VcDevel/Vc/wiki/ISO-Standardization-of-the-Vector-classes)
|
||||
|
||||
## License
|
||||
|
||||
Vc is released under the terms of the [3-clause BSD license](http://opensource.org/licenses/BSD-3-Clause).
|
|
@ -0,0 +1,140 @@
|
|||
#!/bin/sh -e
|
||||
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games"
|
||||
export LANG="en_US.UTF-8"
|
||||
export LANGUAGE="en_US.UTF-8"
|
||||
export LC_CTYPE="en_US.UTF-8"
|
||||
export LC_NUMERIC="en_US.UTF-8"
|
||||
export LC_TIME="en_US.UTF-8"
|
||||
export LC_MESSAGES="en_US.UTF-8"
|
||||
unset CFLAGS CXXFLAGS
|
||||
|
||||
cd "`dirname "$0"`"
|
||||
test -z "dashboard_model" && export dashboard_model=Experimental
|
||||
|
||||
runTest() {
|
||||
libpath="$LD_LIBRARY_PATH"
|
||||
test -n "$1" && libpath="$(dirname $(realpath $($CXX $1 -print-file-name=libstdc++.so)))${libpath:+:}${libpath}"
|
||||
LD_LIBRARY_PATH="$libpath" CFLAGS="$1" CXXFLAGS="$1" ctest -S test.cmake || true
|
||||
}
|
||||
|
||||
tested_compilers="lsakdfjwowleqirjodfisj"
|
||||
|
||||
runAllTests() {
|
||||
# first make sure we don't test a compiler a second time
|
||||
id="`which $CXX`"
|
||||
id="`readlink -f $id`"
|
||||
echo "$id"|grep -qF "$tested_compilers" && return
|
||||
tested_compilers="$tested_compilers
|
||||
$id"
|
||||
|
||||
# alright run the ctest script
|
||||
runTest
|
||||
supports32Bit && runTest -m32 || true
|
||||
supportsx32 && runTest -mx32 || true
|
||||
}
|
||||
|
||||
supports32Bit() {
|
||||
test `uname -m` = "x86_64" || return 1
|
||||
CXX=${CXX:-c++}
|
||||
cat > /tmp/m32test.cpp <<END
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <cerrno>
|
||||
void foo(int x) { switch (x) { case 0x0A: break; case 0x0B: break; case 0x0C: break; case 0x0D: break; case 0x0E: break; } }
|
||||
int main() { std::cout << "Hello World!\n"; return 0; }
|
||||
END
|
||||
$CXX -m32 -o /tmp/m32test /tmp/m32test.cpp >/dev/null 2>&1 || return 1
|
||||
rm /tmp/m32test*
|
||||
return 0
|
||||
}
|
||||
|
||||
supportsx32() {
|
||||
test `uname -m` = "x86_64" || return 1
|
||||
CXX=${CXX:-c++}
|
||||
cat > /tmp/mx32test.cpp <<END
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <cerrno>
|
||||
void foo(int x) { switch (x) { case 0x0A: break; case 0x0B: break; case 0x0C: break; case 0x0D: break; case 0x0E: break; } }
|
||||
int main() { std::cout << "Hello World!\n"; return 0; }
|
||||
END
|
||||
$CXX -mx32 -o /tmp/mx32test /tmp/mx32test.cpp >/dev/null 2>&1 || return 1
|
||||
rm /tmp/mx32test*
|
||||
return 0
|
||||
}
|
||||
|
||||
system_compilers() {
|
||||
cxxlist="`find /usr/bin/ /usr/local/bin/ -name '*++-[0-9]*'|grep -v -- -linux-gnu`"
|
||||
if test -z "$cxxlist"; then
|
||||
cxxlist="`find /usr/bin/ /usr/local/bin/ -name '*++'|grep -v -- -linux-gnu`"
|
||||
fi
|
||||
if test -z "$cxxlist"; then
|
||||
# default compiler
|
||||
runAllTests
|
||||
else
|
||||
for CXX in $cxxlist; do
|
||||
CC=`echo "$CXX"|sed 's/clang++/clang/;s/g++/gcc/'`
|
||||
if test -x "$CC" -a -x "$CXX"; then
|
||||
export CC
|
||||
export CXX
|
||||
runAllTests
|
||||
fi
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
modules_compilers() {
|
||||
if test -r /etc/profile.d/modules.sh; then
|
||||
source /etc/profile.d/modules.sh
|
||||
for mod in `module avail -t 2>&1`; do
|
||||
case `echo $mod|tr '[:upper:]' '[:lower:]'` in
|
||||
*intel*|*icc*) export CC=icc CXX=icpc;;
|
||||
*gnu*|*gcc*) export CC=gcc CXX=g++;;
|
||||
*llvm*|*clang*) export CC=clang CXX=clang++;;
|
||||
*) continue;;
|
||||
esac
|
||||
module load $mod
|
||||
runAllTests
|
||||
module unload $mod
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
gccbuild_compilers() {
|
||||
for VcEnv in `find /opt/ -mindepth 2 -maxdepth 2 -name Vc.env`; do (
|
||||
. "$VcEnv"
|
||||
case "$VcEnv" in
|
||||
*-snapshot/Vc.env)
|
||||
( cd $HOME/src/gcc-build && ./update.sh "`dirname "$VcEnv"`" )
|
||||
;;
|
||||
esac
|
||||
runAllTests
|
||||
) done
|
||||
}
|
||||
|
||||
icc_compilers() {
|
||||
test -d /opt/intel || return
|
||||
export CC=icc
|
||||
export CXX=icpc
|
||||
icclist="`find /opt/intel/compiler* -name 'iccvars.sh' | xargs readlink -e | sort -ur`"
|
||||
case `uname -m` in
|
||||
x86_64)
|
||||
COMPILERVARS_ARCHITECTURE=intel64
|
||||
;;
|
||||
i[345678]86)
|
||||
COMPILERVARS_ARCHITECTURE=ia32
|
||||
;;
|
||||
esac
|
||||
export COMPILERVARS_ARCHITECTURE
|
||||
test -n "$icclist" && for IccEnv in $icclist; do (
|
||||
. $IccEnv $COMPILERVARS_ARCHITECTURE
|
||||
runAllTests
|
||||
) done
|
||||
}
|
||||
|
||||
system_compilers
|
||||
modules_compilers
|
||||
gccbuild_compilers
|
||||
icc_compilers
|
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
|
||||
case "$1" in
|
||||
Experimental|Nightly|Continuous)
|
||||
export dashboard_model=$1
|
||||
case "$2" in
|
||||
None|Debug|Release|RelWithDebug|RelWithDebInfo|MinSizeRel)
|
||||
export build_type=$2
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 <model> [<build type>]"
|
||||
echo
|
||||
echo "Possible arguments for model are Nightly, Continuous, or Experimental."
|
||||
echo "Build type may be one of: None Debug Release RelWithDebug RelWithDebInfo MinSizeRel."
|
||||
echo
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
ctest -S "`dirname $0`/test.cmake"
|
|
@ -0,0 +1,284 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_ALLOCATOR_H_
|
||||
#define VC_ALLOCATOR_H_
|
||||
|
||||
#include <new>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <utility>
|
||||
|
||||
#include "global.h"
|
||||
#include "common/macros.h"
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
*
|
||||
* Convenience macro to set the default allocator for a given \p Type to
|
||||
* Vc::Allocator.
|
||||
*
|
||||
* \param Type Your type that you want to use with STL containers.
|
||||
*
|
||||
* \note You have to use this macro in the global namespace.
|
||||
*/
|
||||
#ifdef Vc_MSVC
|
||||
#define Vc_DECLARE_ALLOCATOR(Type) \
|
||||
namespace std \
|
||||
{ \
|
||||
template <> class allocator<Type> : public ::Vc::Allocator<Type> \
|
||||
{ \
|
||||
public: \
|
||||
template <typename U> struct rebind { \
|
||||
typedef ::std::allocator<U> other; \
|
||||
}; \
|
||||
/* MSVC brokenness: the following function is optional - just doesn't compile \
|
||||
* without it */ \
|
||||
const allocator &select_on_container_copy_construction() const { return *this; } \
|
||||
}; \
|
||||
}
|
||||
#else
|
||||
#define Vc_DECLARE_ALLOCATOR(Type) \
|
||||
namespace std \
|
||||
{ \
|
||||
template <> class allocator<Type> : public ::Vc::Allocator<Type> \
|
||||
{ \
|
||||
public: \
|
||||
template <typename U> struct rebind { \
|
||||
typedef ::std::allocator<U> other; \
|
||||
}; \
|
||||
}; \
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
using std::size_t;
|
||||
using std::ptrdiff_t;
|
||||
|
||||
/**
|
||||
* \headerfile Allocator <Vc/Allocator>
|
||||
* An allocator that uses global new and supports over-aligned types, as per [C++11 20.6.9].
|
||||
*
|
||||
* Meant as a simple replacement for the allocator defined in the C++ Standard.
|
||||
* Allocation is done using the global new/delete operators. But if the alignment property of \p
|
||||
* T is larger than the size of a pointer, the allocate function allocates slightly more memory
|
||||
* to adjust the pointer for correct alignment.
|
||||
*
|
||||
* If the \p T does not require over-alignment no additional memory will be allocated.
|
||||
*
|
||||
* \tparam T The type of objects to allocate.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* struct Data {
|
||||
* Vc::float_v x, y, z;
|
||||
* };
|
||||
*
|
||||
* void fun()
|
||||
* {
|
||||
* std::vector<Data> dat0; // this will use std::allocator<Data>, which probably ignores the
|
||||
* // alignment requirements for Data. Thus any access to dat0 may
|
||||
* // crash your program.
|
||||
*
|
||||
* std::vector<Data, Vc::Allocator<Data> > dat1; // now std::vector will get correctly aligned
|
||||
* // memory. Accesses to dat1 are safe.
|
||||
* ...
|
||||
* \endcode
|
||||
*
|
||||
* %Vc ships a macro to conveniently tell STL to use Vc::Allocator per default for a given type:
|
||||
* \code
|
||||
* struct Data {
|
||||
* Vc::float_v x, y, z;
|
||||
* };
|
||||
* Vc_DECLARE_ALLOCATOR(Data)
|
||||
*
|
||||
* void fun()
|
||||
* {
|
||||
* std::vector<Data> dat0; // good now
|
||||
* ...
|
||||
* \endcode
|
||||
*
|
||||
* \ingroup Utilities
|
||||
*/
|
||||
template<typename T> class Allocator
|
||||
{
|
||||
private:
|
||||
enum Constants {
|
||||
#ifdef Vc_HAVE_STD_MAX_ALIGN_T
|
||||
NaturalAlignment = alignof(std::max_align_t),
|
||||
#elif defined(Vc_HAVE_MAX_ALIGN_T)
|
||||
NaturalAlignment = alignof(::max_align_t),
|
||||
#else
|
||||
NaturalAlignment = sizeof(void *) > alignof(long double) ? sizeof(void *) :
|
||||
(alignof(long double) > alignof(long long) ? alignof(long double) : alignof(long long)),
|
||||
#endif
|
||||
#if defined Vc_IMPL_AVX
|
||||
SimdAlignment = 32,
|
||||
#elif defined Vc_IMPL_SSE
|
||||
SimdAlignment = 16,
|
||||
#else
|
||||
SimdAlignment = 1,
|
||||
#endif
|
||||
Alignment = alignof(T) > SimdAlignment ? alignof(T) : SimdAlignment,
|
||||
/* The number of extra bytes allocated must be large enough to put a pointer right
|
||||
* before the adjusted address. This pointer stores the original address, which is
|
||||
* required to call ::operator delete in deallocate.
|
||||
*
|
||||
* The address we get from ::operator new is a multiple of NaturalAlignment:
|
||||
* p = N * NaturalAlignment
|
||||
*
|
||||
* Since all alignments are powers of two, Alignment is a multiple of NaturalAlignment:
|
||||
* Alignment = k * NaturalAlignment
|
||||
*
|
||||
* two cases:
|
||||
* 1. If p is already aligned to Alignment then allocate will return p + Alignment. In
|
||||
* this case there are Alignment Bytes available to store a pointer.
|
||||
* 2. If p is not aligned then p + (k - (N modulo k)) * NaturalAlignment will be
|
||||
* returned. Since NaturalAlignment >= sizeof(void*) the pointer fits.
|
||||
*/
|
||||
ExtraBytes = Alignment > NaturalAlignment ? Alignment : 0,
|
||||
AlignmentMask = Alignment - 1
|
||||
};
|
||||
public:
|
||||
typedef size_t size_type;
|
||||
typedef ptrdiff_t difference_type;
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef T value_type;
|
||||
|
||||
template<typename U> struct rebind { typedef Allocator<U> other; };
|
||||
|
||||
Allocator() throw() { }
|
||||
Allocator(const Allocator&) throw() { }
|
||||
template<typename U> Allocator(const Allocator<U>&) throw() { }
|
||||
|
||||
pointer address(reference x) const { return &x; }
|
||||
const_pointer address(const_reference x) const { return &x; }
|
||||
|
||||
pointer allocate(size_type n, const void* = 0)
|
||||
{
|
||||
if (n > this->max_size()) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
char *p = static_cast<char *>(::operator new(n * sizeof(T) + ExtraBytes));
|
||||
if (ExtraBytes > 0) {
|
||||
char *const pp = p;
|
||||
p += ExtraBytes;
|
||||
const char *null = 0;
|
||||
p -= ((p - null) & AlignmentMask); // equivalent to p &= ~AlignmentMask;
|
||||
reinterpret_cast<char **>(p)[-1] = pp;
|
||||
}
|
||||
return reinterpret_cast<pointer>(p);
|
||||
}
|
||||
|
||||
void deallocate(pointer p, size_type)
|
||||
{
|
||||
if (ExtraBytes > 0) {
|
||||
p = reinterpret_cast<pointer *>(p)[-1];
|
||||
}
|
||||
::operator delete(p);
|
||||
}
|
||||
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(T); }
|
||||
|
||||
#ifdef Vc_MSVC
|
||||
// MSVC brokenness: the following function is optional - just doesn't compile without it
|
||||
const Allocator &select_on_container_copy_construction() const { return *this; }
|
||||
|
||||
// MSVC also requires a function that neither C++98 nor C++11 mention
|
||||
// but it doesn't support variadic templates... otherwise the Vc_CXX11 clause would be nice
|
||||
void construct(pointer p) { ::new(p) T(); }
|
||||
|
||||
// we still need the C++98 version:
|
||||
void construct(pointer p, const T& val) { ::new(p) T(val); }
|
||||
void destroy(pointer p) { p->~T(); }
|
||||
#else
|
||||
template<typename U, typename... Args> void construct(U* p, Args&&... args)
|
||||
{
|
||||
::new(p) U(std::forward<Args>(args)...);
|
||||
}
|
||||
template<typename U> void destroy(U* p) { p->~U(); }
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T> inline bool operator==(const Allocator<T>&, const Allocator<T>&) { return true; }
|
||||
template<typename T> inline bool operator!=(const Allocator<T>&, const Allocator<T>&) { return false; }
|
||||
|
||||
}
|
||||
|
||||
#include "vector.h"
|
||||
namespace std
|
||||
{
|
||||
template<typename T, typename Abi>
|
||||
class allocator<Vc::Vector<T, Abi> > : public ::Vc::Allocator<Vc::Vector<T, Abi> >
|
||||
{
|
||||
public:
|
||||
template<typename U> struct rebind { typedef ::std::allocator<U> other; };
|
||||
#ifdef Vc_MSVC
|
||||
// MSVC brokenness: the following function is optional - just doesn't compile without it
|
||||
const allocator &select_on_container_copy_construction() const { return *this; }
|
||||
#endif
|
||||
};
|
||||
template <typename T, typename Abi>
|
||||
class allocator<Vc::Mask<T, Abi>> : public ::Vc::Allocator<Vc::Mask<T, Abi>>
|
||||
{
|
||||
public:
|
||||
template<typename U> struct rebind { typedef ::std::allocator<U> other; };
|
||||
#ifdef Vc_MSVC
|
||||
// MSVC brokenness: the following function is optional - just doesn't compile without it
|
||||
const allocator &select_on_container_copy_construction() const { return *this; }
|
||||
#endif
|
||||
};
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
class allocator<Vc::SimdArray<T, N, V, M>> : public ::Vc::Allocator<Vc::SimdArray<T, N, V, M>>
|
||||
{
|
||||
public:
|
||||
template<typename U> struct rebind { typedef ::std::allocator<U> other; };
|
||||
#ifdef Vc_MSVC
|
||||
// MSVC brokenness: the following function is optional - just doesn't compile without it
|
||||
const allocator &select_on_container_copy_construction() const { return *this; }
|
||||
#endif
|
||||
};
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
class allocator<Vc::SimdMaskArray<T, N, V, M>> : public ::Vc::Allocator<Vc::SimdMaskArray<T, N, V, M>>
|
||||
{
|
||||
public:
|
||||
template<typename U> struct rebind { typedef ::std::allocator<U> other; };
|
||||
#ifdef Vc_MSVC
|
||||
// MSVC brokenness: the following function is optional - just doesn't compile without it
|
||||
const allocator &select_on_container_copy_construction() const { return *this; }
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif // VC_ALLOCATOR_H_
|
||||
|
||||
// vim: ft=cpp et sw=4 sts=4
|
|
@ -0,0 +1,268 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_IO_
|
||||
#define VC_IO_
|
||||
|
||||
#include "common/types.h"
|
||||
#include "common/simdarrayfwd.h"
|
||||
#include "common/memoryfwd.h"
|
||||
#include <iostream>
|
||||
|
||||
#if defined(__GNUC__) && !defined(_WIN32) && defined(_GLIBCXX_OSTREAM)
|
||||
#define Vc_HACK_OSTREAM_FOR_TTY 1
|
||||
#endif
|
||||
|
||||
#ifdef Vc_HACK_OSTREAM_FOR_TTY
|
||||
#include <unistd.h>
|
||||
#include <ext/stdio_sync_filebuf.h>
|
||||
#endif
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace
|
||||
{
|
||||
#ifdef Vc_HACK_OSTREAM_FOR_TTY
|
||||
class hacked_ostream : public std::ostream
|
||||
{
|
||||
public:
|
||||
using std::ostream::_M_streambuf;
|
||||
};
|
||||
bool mayUseColor(const std::ostream &os) __attribute__((__const__));
|
||||
bool mayUseColor(const std::ostream &os)
|
||||
{
|
||||
std::basic_streambuf<char> *hack1 =
|
||||
const_cast<std::basic_streambuf<char> *>(os.*(&hacked_ostream::_M_streambuf));
|
||||
__gnu_cxx::stdio_sync_filebuf<char> *hack =
|
||||
dynamic_cast<__gnu_cxx::stdio_sync_filebuf<char> *>(hack1);
|
||||
if (!hack) {
|
||||
return false;
|
||||
}
|
||||
FILE *file = hack->file();
|
||||
return 1 == isatty(fileno(file));
|
||||
}
|
||||
#else
|
||||
bool mayUseColor(const std::ostream &) { return false; }
|
||||
#endif
|
||||
} // anonymous namespace
|
||||
|
||||
namespace AnsiColor
|
||||
{
|
||||
struct Type
|
||||
{
|
||||
const char *data;
|
||||
};
|
||||
static const Type green = {"\033[1;40;32m"};
|
||||
static const Type yellow = {"\033[1;40;33m"};
|
||||
static const Type blue = {"\033[1;40;34m"};
|
||||
static const Type normal = {"\033[0m"};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &out, const Type &c)
|
||||
{
|
||||
if (mayUseColor(out)) {
|
||||
out << c.data;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
} // namespace AnsiColor
|
||||
|
||||
/**
|
||||
* \ingroup Vectors
|
||||
* \headerfile IO <Vc/IO>
|
||||
*
|
||||
* Prints the contents of a vector into a stream object.
|
||||
*
|
||||
* \code
|
||||
* const Vc::int_v v(Vc::IndexesFromZero);
|
||||
* std::cout << v << std::endl;
|
||||
* \endcode
|
||||
* will output (with SSE):
|
||||
\verbatim
|
||||
[0, 1, 2, 3]
|
||||
\endverbatim
|
||||
*
|
||||
* \param out Any standard C++ ostream object. For example std::cout or a
|
||||
* std::stringstream object.
|
||||
* \param v Any Vc::Vector object.
|
||||
* \return The ostream object: to chain multiple stream operations.
|
||||
*
|
||||
* \note With the GNU standard library this function will check whether the
|
||||
* output stream is a tty in which case it colorizes the output.
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
inline std::ostream &operator<<(std::ostream &out, const Vc::Vector<T, Abi> &v)
|
||||
{
|
||||
using TT = typename std::conditional<std::is_same<T, char>::value ||
|
||||
std::is_same<T, unsigned char>::value ||
|
||||
std::is_same<T, signed char>::value,
|
||||
int,
|
||||
T>::type;
|
||||
out << AnsiColor::green << '[';
|
||||
out << TT(v[0]);
|
||||
for (size_t i = 1; i < v.Size; ++i) {
|
||||
out << ", " << TT(v[i]);
|
||||
}
|
||||
out << ']' << AnsiColor::normal;
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* \ingroup Masks
|
||||
* \headerfile IO <Vc/IO>
|
||||
*
|
||||
* Prints the contents of a mask into a stream object.
|
||||
*
|
||||
* \code
|
||||
* const Vc::short_m m = Vc::short_v::IndexesFromZero() < 3;
|
||||
* std::cout << m << std::endl;
|
||||
* \endcode
|
||||
* will output (with SSE):
|
||||
\verbatim
|
||||
m[1110 0000]
|
||||
\endverbatim
|
||||
*
|
||||
* \param out Any standard C++ ostream object. For example std::cout or a
|
||||
* std::stringstream object.
|
||||
* \param m Any Vc::Mask object.
|
||||
* \return The ostream object: to chain multiple stream operations.
|
||||
*
|
||||
* \note With the GNU standard library this function will check whether the
|
||||
* output stream is a tty in which case it colorizes the output.
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
inline std::ostream &operator<<(std::ostream &out, const Vc::Mask<T, Abi> &m)
|
||||
{
|
||||
out << AnsiColor::blue << "m[";
|
||||
for (unsigned int i = 0; i < m.Size; ++i) {
|
||||
if (i > 0 && (i % 4) == 0) {
|
||||
out << ' ';
|
||||
}
|
||||
if (m[i]) {
|
||||
out << AnsiColor::yellow << '1';
|
||||
} else {
|
||||
out << AnsiColor::blue << '0';
|
||||
}
|
||||
}
|
||||
out << AnsiColor::blue << ']' << AnsiColor::normal;
|
||||
return out;
|
||||
}
|
||||
|
||||
namespace Common
|
||||
{
|
||||
#ifdef DOXYGEN
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
* \headerfile dox.h <Vc/IO>
|
||||
*
|
||||
* Prints the contents of a Memory object into a stream object.
|
||||
*
|
||||
* \code
|
||||
* Vc::Memory<int_v, 10> m;
|
||||
* for (int i = 0; i < m.entriesCount(); ++i) {
|
||||
* m[i] = i;
|
||||
* }
|
||||
* std::cout << m << std::endl;
|
||||
* \endcode
|
||||
* will output (with SSE):
|
||||
\verbatim
|
||||
{[0, 1, 2, 3] [4, 5, 6, 7] [8, 9, 0, 0]}
|
||||
\endverbatim
|
||||
*
|
||||
* \param s Any standard C++ ostream object. For example std::cout or a std::stringstream object.
|
||||
* \param m Any Vc::Memory object.
|
||||
* \return The ostream object: to chain multiple stream operations.
|
||||
*
|
||||
* \note With the GNU standard library this function will check whether the
|
||||
* output stream is a tty in which case it colorizes the output.
|
||||
*
|
||||
* \warning Please do not forget that printing a large memory object can take a long time.
|
||||
*/
|
||||
template<typename V, typename Parent, typename Dimension, typename RM>
|
||||
inline std::ostream &operator<<(std::ostream &s, const Vc::MemoryBase<V, Parent, Dimension, RM> &m);
|
||||
#endif
|
||||
|
||||
template<typename V, typename Parent, typename RM>
|
||||
inline std::ostream &operator<<(std::ostream &out, const MemoryBase<V, Parent, 1, RM> &m )
|
||||
{
|
||||
out << AnsiColor::blue << '{' << AnsiColor::normal;
|
||||
for (unsigned int i = 0; i < m.vectorsCount(); ++i) {
|
||||
out << V(m.vector(i));
|
||||
}
|
||||
out << AnsiColor::blue << '}' << AnsiColor::normal;
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename V, typename Parent, typename RM>
|
||||
inline std::ostream &operator<<(std::ostream &out, const MemoryBase<V, Parent, 2, RM> &m )
|
||||
{
|
||||
out << AnsiColor::blue << '{' << AnsiColor::normal;
|
||||
for (size_t i = 0; i < m.rowsCount(); ++i) {
|
||||
if (i > 0) {
|
||||
out << "\n ";
|
||||
}
|
||||
const size_t vcount = m[i].vectorsCount();
|
||||
for (size_t j = 0; j < vcount; ++j) {
|
||||
out << V(m[i].vector(j));
|
||||
}
|
||||
}
|
||||
out << AnsiColor::blue << '}' << AnsiColor::normal;
|
||||
return out;
|
||||
}
|
||||
} // namespace Common
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
inline std::ostream &operator<<(std::ostream &out, const SimdArray<T, N> &v)
|
||||
{
|
||||
out << AnsiColor::green << '<' << v[0];
|
||||
for (size_t i = 1; i < N; ++i) {
|
||||
if (i % 4 == 0) out << " |";
|
||||
out << ' ' << v[i];
|
||||
}
|
||||
return out << '>' << AnsiColor::normal;
|
||||
}
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
inline std::ostream &operator<<(std::ostream &out, const SimdMaskArray<T, N> &m)
|
||||
{
|
||||
out << AnsiColor::blue << "«";
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
if (i > 0 && (i % 4) == 0) {
|
||||
out << ' ';
|
||||
}
|
||||
if ( m[i] ) {
|
||||
out << AnsiColor::yellow << '1';
|
||||
} else {
|
||||
out << AnsiColor::blue << '0';
|
||||
}
|
||||
}
|
||||
return out << AnsiColor::blue << "»" << AnsiColor::normal;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // VC_IO_
|
||||
|
||||
// vim: ft=cpp foldmethod=marker
|
|
@ -0,0 +1,43 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_MEMORY_
|
||||
#define VC_MEMORY_
|
||||
|
||||
#include "vector.h"
|
||||
#include "common/memory.h"
|
||||
#include "common/interleavedmemory.h"
|
||||
|
||||
#include "common/make_unique.h"
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
using Common::make_unique;
|
||||
}
|
||||
|
||||
#endif // VC_MEMORY_
|
||||
|
||||
// vim: ft=cpp foldmethod=marker
|
|
@ -0,0 +1,35 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_SIMDARRAY_
|
||||
#define VC_SIMDARRAY_
|
||||
|
||||
#include "common/simdarray.h"
|
||||
|
||||
#endif // VC_SIMDARRAY_
|
||||
|
||||
// vim: ft=cpp foldmethod=marker
|
|
@ -0,0 +1,44 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2010-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_UTILS_
|
||||
#define VC_UTILS_
|
||||
|
||||
#include "global.h"
|
||||
|
||||
#ifdef Vc_IMPL_Scalar
|
||||
# define VECTOR_NAMESPACE Scalar
|
||||
#else
|
||||
# define VECTOR_NAMESPACE SSE
|
||||
#endif
|
||||
|
||||
#include "common/deinterleave.h"
|
||||
#include "common/makeContainer.h"
|
||||
|
||||
#endif // VC_UTILS_
|
||||
|
||||
// vim: ft=cpp foldmethod=marker
|
|
@ -0,0 +1,43 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_VC_
|
||||
#define VC_VC_
|
||||
#include "vector.h"
|
||||
#include "IO"
|
||||
#include "Memory"
|
||||
#include "Utils"
|
||||
#include "Allocator"
|
||||
#include "algorithm"
|
||||
#include "iterators"
|
||||
#include "simdize"
|
||||
#include "array"
|
||||
#include "span"
|
||||
#include "vector"
|
||||
#endif // VC_VC_
|
||||
|
||||
// vim: ft=cpp foldmethod=marker
|
|
@ -0,0 +1 @@
|
|||
#include "common/algorithms.h"
|
|
@ -0,0 +1,315 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
//===---------------------------- array -----------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||
// Source Licenses. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef VC_INCLUDE_VC_ARRAY_
|
||||
#define VC_INCLUDE_VC_ARRAY_
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "common/subscript.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/**
|
||||
* \ingroup Containers
|
||||
* This is `std::array` with additional subscript operators supporting gather and scatter operations.
|
||||
*
|
||||
* The [std::array](https://en.cppreference.com/w/cpp/container/array) documentation applies.
|
||||
*
|
||||
* Gathers from structured data (AoS: arrays of struct) are possible via a special
|
||||
* subscript operator.
|
||||
* Example:
|
||||
* \code
|
||||
* Vc::array<float, 100> data;
|
||||
* std::iota(data.begin(), data.end(), 0.f); // fill with values 0, 1, 2, ...
|
||||
* auto indexes = float_v::IndexType::IndexesFromZero();
|
||||
* float_v gathered = data[indexes]; // gathered == [0, 1, 2, ...]
|
||||
* \endcode
|
||||
*
|
||||
* This also works for gathers into arrays of structures:
|
||||
* \code
|
||||
* struct Point { float x, y, z; };
|
||||
* Vc::array<Point, 100> points;
|
||||
* // fill points ...
|
||||
* auto indexes = float_v::IndexType::IndexesFromZero();
|
||||
* float_v xs = data[indexes][&Point::x]; // [points[0].x, points[1].x, points[2].x, ...]
|
||||
* float_v ys = data[indexes][&Point::y]; // [points[0].y, points[1].y, points[2].y, ...]
|
||||
* float_v zs = data[indexes][&Point::z]; // [points[0].z, points[1].z, points[2].z, ...]
|
||||
* \endcode
|
||||
*
|
||||
* Arrays may also be nested:
|
||||
* \code:
|
||||
* Vc::array<Vc::array<float, 3>, 100> points;
|
||||
* // fill points ...
|
||||
* auto indexes = float_v::IndexType::IndexesFromZero();
|
||||
* float_v xs = data[indexes][0]; // [points[0][0], points[1][0], points[2][0], ...]
|
||||
* float_v ys = data[indexes][1]; // [points[0][1], points[1][1], points[2][1], ...]
|
||||
* float_v zs = data[indexes][2]; // [points[0][2], points[1][2], points[2][2], ...]
|
||||
* \endcode
|
||||
*/
|
||||
template <class T, size_t Size> struct array {
|
||||
// types:
|
||||
typedef array self_;
|
||||
typedef T value_type;
|
||||
typedef value_type& reference;
|
||||
typedef const value_type& const_reference;
|
||||
typedef value_type* iterator;
|
||||
typedef const value_type* const_iterator;
|
||||
typedef value_type* pointer;
|
||||
typedef const value_type* const_pointer;
|
||||
typedef size_t size_type;
|
||||
typedef ptrdiff_t difference_type;
|
||||
typedef std::reverse_iterator<iterator> reverse_iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
|
||||
value_type elems_[Size > 0 ? Size : 1];
|
||||
|
||||
// No explicit construct/copy/destroy for aggregate type
|
||||
void fill(const value_type& u_) { std::fill_n(elems_, Size, u_); }
|
||||
void swap(array& a_) noexcept(std::swap(std::declval<T &>(), std::declval<T &>()))
|
||||
{
|
||||
std::swap_ranges(elems_, elems_ + Size, a_.elems_);
|
||||
}
|
||||
|
||||
// iterators:
|
||||
iterator begin() noexcept { return iterator(elems_); }
|
||||
const_iterator begin() const noexcept { return const_iterator(elems_); }
|
||||
iterator end() noexcept { return iterator(elems_ + Size); }
|
||||
const_iterator end() const noexcept { return const_iterator(elems_ + Size); }
|
||||
reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }
|
||||
const_reverse_iterator rbegin() const noexcept
|
||||
{
|
||||
return const_reverse_iterator(end());
|
||||
}
|
||||
reverse_iterator rend() noexcept { return reverse_iterator(begin()); }
|
||||
const_reverse_iterator rend() const noexcept
|
||||
{
|
||||
return const_reverse_iterator(begin());
|
||||
}
|
||||
|
||||
const_iterator cbegin() const noexcept { return begin(); }
|
||||
const_iterator cend() const noexcept { return end(); }
|
||||
const_reverse_iterator crbegin() const noexcept { return rbegin(); }
|
||||
const_reverse_iterator crend() const noexcept { return rend(); }
|
||||
// capacity:
|
||||
constexpr size_type size() const noexcept { return Size; }
|
||||
constexpr size_type max_size() const noexcept { return Size; }
|
||||
constexpr bool empty() const noexcept { return Size == 0; }
|
||||
// element access:
|
||||
reference operator[](size_type n_) { return elems_[n_]; }
|
||||
constexpr const_reference operator[](size_type n_) const { return elems_[n_]; }
|
||||
|
||||
/**
|
||||
* \name Data-Parallel Subscripting for Gather & Scatter
|
||||
*/
|
||||
///@{
|
||||
template <typename I>
|
||||
Vc_ALWAYS_INLINE auto operator[](I&& arg_)
|
||||
-> decltype(subscript_operator(*this, std::forward<I>(arg_)))
|
||||
{
|
||||
return subscript_operator(*this, std::forward<I>(arg_));
|
||||
}
|
||||
|
||||
template <typename I>
|
||||
Vc_ALWAYS_INLINE auto operator[](I&& arg_) const
|
||||
-> decltype(subscript_operator(*this, std::forward<I>(arg_)))
|
||||
{
|
||||
return subscript_operator(*this, std::forward<I>(arg_));
|
||||
}
|
||||
///@}
|
||||
|
||||
reference at(size_type n_);
|
||||
constexpr const_reference at(size_type n_) const;
|
||||
|
||||
reference front() { return elems_[0]; }
|
||||
constexpr const_reference front() const { return elems_[0]; }
|
||||
reference back() { return elems_[Size > 0 ? Size - 1 : 0]; }
|
||||
constexpr const_reference back() const { return elems_[Size > 0 ? Size - 1 : 0]; }
|
||||
value_type* data() noexcept { return elems_; }
|
||||
const value_type* data() const noexcept { return elems_; }
|
||||
};
|
||||
|
||||
template <class T, size_t Size>
|
||||
typename array<T, Size>::reference array<T, Size>::at(size_type n_)
|
||||
{
|
||||
if (n_ >= Size) {
|
||||
throw std::out_of_range("array::at");
|
||||
}
|
||||
return elems_[n_];
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
constexpr typename array<T, Size>::const_reference array<T, Size>::at(size_type n_) const
|
||||
{
|
||||
return n_ >= Size ? (throw std::out_of_range("array::at"), elems_[0]) : elems_[n_];
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
inline bool operator==(const array<T, Size>& x_, const array<T, Size>& y_)
|
||||
{
|
||||
return std::equal(x_.elems_, x_.elems_ + Size, y_.elems_);
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
inline bool operator!=(const array<T, Size>& x_, const array<T, Size>& y_)
|
||||
{
|
||||
return !(x_ == y_);
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
inline bool operator<(const array<T, Size>& x_, const array<T, Size>& y_)
|
||||
{
|
||||
return std::lexicographical_compare(x_.elems_, x_.elems_ + Size, y_.elems_,
|
||||
y_.elems_ + Size);
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
inline bool operator>(const array<T, Size>& x_, const array<T, Size>& y_)
|
||||
{
|
||||
return y_ < x_;
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
inline bool operator<=(const array<T, Size>& x_, const array<T, Size>& y_)
|
||||
{
|
||||
return !(y_ < x_);
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
inline bool operator>=(const array<T, Size>& x_, const array<T, Size>& y_)
|
||||
{
|
||||
return !(x_ < y_);
|
||||
}
|
||||
|
||||
/**\name non-member begin & end
|
||||
* Implement the non-member begin & end functions in the %Vc namespace so that ADL works
|
||||
* and `begin(some_vc_array)` always works.
|
||||
*/
|
||||
///@{
|
||||
template <typename T, std::size_t N>
|
||||
inline auto begin(array<T, N>& arr) -> decltype(arr.begin())
|
||||
{
|
||||
return arr.begin();
|
||||
}
|
||||
template <typename T, std::size_t N>
|
||||
inline auto begin(const array<T, N>& arr) -> decltype(arr.begin())
|
||||
{
|
||||
return arr.begin();
|
||||
}
|
||||
template <typename T, std::size_t N>
|
||||
inline auto end(array<T, N>& arr) -> decltype(arr.end())
|
||||
{
|
||||
return arr.end();
|
||||
}
|
||||
template <typename T, std::size_t N>
|
||||
inline auto end(const array<T, N>& arr) -> decltype(arr.end())
|
||||
{
|
||||
return arr.end();
|
||||
}
|
||||
///@}
|
||||
|
||||
namespace Traits
|
||||
{
|
||||
template <typename T, std::size_t N>
|
||||
struct has_no_allocated_data_impl<Vc::array<T, N>> : public std::true_type
|
||||
{
|
||||
};
|
||||
template <typename T, std::size_t N>
|
||||
struct has_contiguous_storage_impl<Vc::array<T, N>> : public std::true_type
|
||||
{
|
||||
};
|
||||
} // namespace Traits
|
||||
} // namespace Vc
|
||||
|
||||
namespace std
|
||||
{
|
||||
template <class T, size_t Size>
|
||||
inline
|
||||
#ifdef Vc_MSVC
|
||||
// MSVC fails to do SFINAE correctly and gets totally confused:
|
||||
// error C2433: 'type': 'inline' not permitted on data declarations
|
||||
// error C4430: missing type specifier - int assumed. Note: C++ does not support default-int
|
||||
// error C2061: syntax error: identifier 'swap'
|
||||
void
|
||||
#else
|
||||
typename enable_if<is_same<void, decltype(swap(declval<T&>(), declval<T&>()))>::value,
|
||||
void>::type
|
||||
#endif
|
||||
swap(const Vc::array<T, Size>& x_,
|
||||
const Vc::array<T, Size>& y_) noexcept(swap(declval<T&>(), declval<T&>()))
|
||||
{
|
||||
x_.swap(y_);
|
||||
}
|
||||
|
||||
template <class T, size_t Size>
|
||||
class tuple_size<Vc::array<T, Size>> : public integral_constant<size_t, Size>
|
||||
{
|
||||
};
|
||||
|
||||
template <size_t I, class T, size_t Size> class tuple_element<I, Vc::array<T, Size>>
|
||||
{
|
||||
public:
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
template <size_t I, class T, size_t Size>
|
||||
inline constexpr typename std::enable_if<(I < Size), T&>::type get(
|
||||
Vc::array<T, Size>& a_) noexcept
|
||||
{
|
||||
return a_.elems_[I];
|
||||
}
|
||||
|
||||
template <size_t I, class T, size_t Size>
|
||||
inline constexpr typename std::enable_if<(I < Size), const T&>::type get(
|
||||
const Vc::array<T, Size>& a_) noexcept
|
||||
{
|
||||
return a_.elems_[I];
|
||||
}
|
||||
|
||||
template <size_t I, class T, size_t Size>
|
||||
inline constexpr typename std::enable_if<(I < Size), T&&>::type get(
|
||||
Vc::array<T, Size>&& a_) noexcept
|
||||
{
|
||||
return std::move(a_.elems_[I]);
|
||||
}
|
||||
} // namespace std
|
||||
|
||||
#endif // VC_INCLUDE_VC_ARRAY_
|
||||
|
||||
// vim: ft=cpp foldmethod=marker
|
|
@ -0,0 +1,58 @@
|
|||
###########################################
|
||||
################# AVX #################
|
||||
###########################################
|
||||
|
||||
|
||||
1. Floating Point
|
||||
===========================================
|
||||
Uses full 256bit vectors for all operations. 128bit vectors are never used.
|
||||
|
||||
|
||||
2. Integer
|
||||
===========================================
|
||||
Integer support in AVX is minimal.
|
||||
The 256bit integer vectors are just intended as a supporting type of float operations.
|
||||
|
||||
Any arithmetic, logical, or comparison operations must be implemented using 128bit operations.
|
||||
|
||||
int_v/uint_v could be implemented either as 128 or 256 types. I.e. either int_v::Size == 4 or 8.
|
||||
|
||||
|
||||
2.1. 256bit int vectors
|
||||
===========================================
|
||||
|
||||
2.1.1. Implementation Details:
|
||||
This requires the SSE operations to not zero the high bits of the registers. Since the YMM registers
|
||||
are aliased on the XMM registers you need to use SSE ops that are not using the VEX prefix (IIUC).
|
||||
Or you have to use two XMM registers most of the time.
|
||||
Perfect would be the use of
|
||||
union M256I {
|
||||
__m256i ymm;
|
||||
__m128i xmm[2];
|
||||
};
|
||||
But as far as I know GCC, this will result in lots of unnecessary loads and stores. (It seems this is
|
||||
due to GCC expecting aliasing, thus making sure the modified values are always up-to-date in memory
|
||||
- like if it were declared volatile.)
|
||||
|
||||
2.1.2. Upsides:
|
||||
int_v::Size == float_v::Size
|
||||
|
||||
2.1.3. Downsides:
|
||||
Register pressure is increased.
|
||||
|
||||
2.2. 128bit int vectors
|
||||
===========================================
|
||||
|
||||
2.2.1. Implementation Details:
|
||||
|
||||
2.2.2. Upsides:
|
||||
|
||||
2.2.3. Downsides:
|
||||
- Use of int_v for float_v operations involving __m256i arguments require an extra type. This will
|
||||
be hard to generalize
|
||||
|
||||
|
||||
2.3. Mixed approach
|
||||
===========================================
|
||||
int_v/uint_v are implemented as 256bit while short_v/ushort_v are implemented as 128bit. Thus
|
||||
int_v::Size == short_v::Size (which is the case on LRBni, too).
|
|
@ -0,0 +1,305 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_CASTS_H_
|
||||
#define VC_AVX_CASTS_H_
|
||||
|
||||
#include "intrinsics.h"
|
||||
#include "types.h"
|
||||
#include "../sse/casts.h"
|
||||
#include "shuffle.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
namespace Casts
|
||||
{
|
||||
template<typename T> Vc_INTRINSIC_L T avx_cast(__m128 v) Vc_INTRINSIC_R;
|
||||
template<typename T> Vc_INTRINSIC_L T avx_cast(__m128i v) Vc_INTRINSIC_R;
|
||||
template<typename T> Vc_INTRINSIC_L T avx_cast(__m128d v) Vc_INTRINSIC_R;
|
||||
template<typename T> Vc_INTRINSIC_L T avx_cast(__m256 v) Vc_INTRINSIC_R;
|
||||
template<typename T> Vc_INTRINSIC_L T avx_cast(__m256i v) Vc_INTRINSIC_R;
|
||||
template<typename T> Vc_INTRINSIC_L T avx_cast(__m256d v) Vc_INTRINSIC_R;
|
||||
|
||||
// 128 -> 128
|
||||
template<> Vc_INTRINSIC __m128 avx_cast(__m128 v) { return v; }
|
||||
template<> Vc_INTRINSIC __m128 avx_cast(__m128i v) { return _mm_castsi128_ps(v); }
|
||||
template<> Vc_INTRINSIC __m128 avx_cast(__m128d v) { return _mm_castpd_ps(v); }
|
||||
template<> Vc_INTRINSIC __m128i avx_cast(__m128 v) { return _mm_castps_si128(v); }
|
||||
template<> Vc_INTRINSIC __m128i avx_cast(__m128i v) { return v; }
|
||||
template<> Vc_INTRINSIC __m128i avx_cast(__m128d v) { return _mm_castpd_si128(v); }
|
||||
template<> Vc_INTRINSIC __m128d avx_cast(__m128 v) { return _mm_castps_pd(v); }
|
||||
template<> Vc_INTRINSIC __m128d avx_cast(__m128i v) { return _mm_castsi128_pd(v); }
|
||||
template<> Vc_INTRINSIC __m128d avx_cast(__m128d v) { return v; }
|
||||
|
||||
// 128 -> 256
|
||||
// FIXME: the following casts leave the upper 128bits undefined. With GCC and ICC I've never
|
||||
// seen the cast not do what I want though: after a VEX-coded SSE instruction the register's
|
||||
// upper 128bits are zero. Thus using the same register as AVX register will have the upper
|
||||
// 128bits zeroed. MSVC, though, implements _mm256_castxx128_xx256 with a 128bit move to memory
|
||||
// + 256bit load. Thus the upper 128bits are really undefined. But there is no intrinsic to do
|
||||
// what I want (i.e. alias the register, disallowing the move to memory in-between). I'm stuck,
|
||||
// do we really want to rely on specific compiler behavior here?
|
||||
template<> Vc_INTRINSIC __m256 avx_cast(__m128 v) { return _mm256_castps128_ps256(v); }
|
||||
template<> Vc_INTRINSIC __m256 avx_cast(__m128i v) { return _mm256_castps128_ps256(_mm_castsi128_ps(v)); }
|
||||
template<> Vc_INTRINSIC __m256 avx_cast(__m128d v) { return _mm256_castps128_ps256(_mm_castpd_ps(v)); }
|
||||
template<> Vc_INTRINSIC __m256i avx_cast(__m128 v) { return _mm256_castsi128_si256(_mm_castps_si128(v)); }
|
||||
template<> Vc_INTRINSIC __m256i avx_cast(__m128i v) { return _mm256_castsi128_si256(v); }
|
||||
template<> Vc_INTRINSIC __m256i avx_cast(__m128d v) { return _mm256_castsi128_si256(_mm_castpd_si128(v)); }
|
||||
template<> Vc_INTRINSIC __m256d avx_cast(__m128 v) { return _mm256_castpd128_pd256(_mm_castps_pd(v)); }
|
||||
template<> Vc_INTRINSIC __m256d avx_cast(__m128i v) { return _mm256_castpd128_pd256(_mm_castsi128_pd(v)); }
|
||||
template<> Vc_INTRINSIC __m256d avx_cast(__m128d v) { return _mm256_castpd128_pd256(v); }
|
||||
|
||||
#if defined Vc_MSVC || defined Vc_CLANG || defined Vc_APPLECLANG
|
||||
static Vc_INTRINSIC Vc_CONST __m256 zeroExtend(__m128 v) { return _mm256_permute2f128_ps (_mm256_castps128_ps256(v), _mm256_castps128_ps256(v), 0x80); }
|
||||
static Vc_INTRINSIC Vc_CONST __m256i zeroExtend(__m128i v) { return _mm256_permute2f128_si256(_mm256_castsi128_si256(v), _mm256_castsi128_si256(v), 0x80); }
|
||||
static Vc_INTRINSIC Vc_CONST __m256d zeroExtend(__m128d v) { return _mm256_permute2f128_pd (_mm256_castpd128_pd256(v), _mm256_castpd128_pd256(v), 0x80); }
|
||||
#else
|
||||
static Vc_INTRINSIC Vc_CONST __m256 zeroExtend(__m128 v) { return _mm256_castps128_ps256(v); }
|
||||
static Vc_INTRINSIC Vc_CONST __m256i zeroExtend(__m128i v) { return _mm256_castsi128_si256(v); }
|
||||
static Vc_INTRINSIC Vc_CONST __m256d zeroExtend(__m128d v) { return _mm256_castpd128_pd256(v); }
|
||||
#endif
|
||||
|
||||
// 256 -> 128
|
||||
template<> Vc_INTRINSIC __m128 avx_cast(__m256 v) { return _mm256_castps256_ps128(v); }
|
||||
template<> Vc_INTRINSIC __m128 avx_cast(__m256i v) { return _mm256_castps256_ps128(_mm256_castsi256_ps(v)); }
|
||||
template<> Vc_INTRINSIC __m128 avx_cast(__m256d v) { return _mm256_castps256_ps128(_mm256_castpd_ps(v)); }
|
||||
template<> Vc_INTRINSIC __m128i avx_cast(__m256 v) { return _mm256_castsi256_si128(_mm256_castps_si256(v)); }
|
||||
template<> Vc_INTRINSIC __m128i avx_cast(__m256i v) { return _mm256_castsi256_si128(v); }
|
||||
template<> Vc_INTRINSIC __m128i avx_cast(__m256d v) { return _mm256_castsi256_si128(_mm256_castpd_si256(v)); }
|
||||
template<> Vc_INTRINSIC __m128d avx_cast(__m256 v) { return _mm256_castpd256_pd128(_mm256_castps_pd(v)); }
|
||||
template<> Vc_INTRINSIC __m128d avx_cast(__m256i v) { return _mm256_castpd256_pd128(_mm256_castsi256_pd(v)); }
|
||||
template<> Vc_INTRINSIC __m128d avx_cast(__m256d v) { return _mm256_castpd256_pd128(v); }
|
||||
|
||||
// 256 -> 256
|
||||
template<> Vc_INTRINSIC __m256 avx_cast(__m256 v) { return v; }
|
||||
template<> Vc_INTRINSIC __m256 avx_cast(__m256i v) { return _mm256_castsi256_ps(v); }
|
||||
template<> Vc_INTRINSIC __m256 avx_cast(__m256d v) { return _mm256_castpd_ps(v); }
|
||||
template<> Vc_INTRINSIC __m256i avx_cast(__m256 v) { return _mm256_castps_si256(v); }
|
||||
template<> Vc_INTRINSIC __m256i avx_cast(__m256i v) { return v; }
|
||||
template<> Vc_INTRINSIC __m256i avx_cast(__m256d v) { return _mm256_castpd_si256(v); }
|
||||
template<> Vc_INTRINSIC __m256d avx_cast(__m256 v) { return _mm256_castps_pd(v); }
|
||||
template<> Vc_INTRINSIC __m256d avx_cast(__m256i v) { return _mm256_castsi256_pd(v); }
|
||||
template<> Vc_INTRINSIC __m256d avx_cast(__m256d v) { return v; }
|
||||
|
||||
// simplify splitting 256-bit registers in 128-bit registers
|
||||
Vc_INTRINSIC Vc_CONST __m128 lo128(__m256 v) { return avx_cast<__m128>(v); }
|
||||
Vc_INTRINSIC Vc_CONST __m128d lo128(__m256d v) { return avx_cast<__m128d>(v); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i lo128(__m256i v) { return avx_cast<__m128i>(v); }
|
||||
Vc_INTRINSIC Vc_CONST __m128 hi128(__m256 v) { return extract128<1>(v); }
|
||||
Vc_INTRINSIC Vc_CONST __m128d hi128(__m256d v) { return extract128<1>(v); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i hi128(__m256i v) { return extract128<1>(v); }
|
||||
|
||||
// simplify combining 128-bit registers in 256-bit registers
|
||||
Vc_INTRINSIC Vc_CONST __m256 concat(__m128 a, __m128 b) { return insert128<1>(avx_cast<__m256 >(a), b); }
|
||||
Vc_INTRINSIC Vc_CONST __m256d concat(__m128d a, __m128d b) { return insert128<1>(avx_cast<__m256d>(a), b); }
|
||||
Vc_INTRINSIC Vc_CONST __m256i concat(__m128i a, __m128i b) { return insert128<1>(avx_cast<__m256i>(a), b); }
|
||||
|
||||
} // namespace Casts
|
||||
using namespace Casts;
|
||||
} // namespace AVX
|
||||
|
||||
namespace AVX2
|
||||
{
|
||||
using namespace AVX::Casts;
|
||||
} // namespace AVX2
|
||||
|
||||
namespace AVX
|
||||
{
|
||||
template <typename From, typename To> struct ConvertTag {};
|
||||
|
||||
Vc_INTRINSIC __m256i convert(__m256 v, ConvertTag<float , int>) { return _mm256_cvttps_epi32(v); }
|
||||
Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag<double, int>) { return _mm256_cvttpd_epi32(v); }
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<int , int>) { return v; }
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<uint , int>) { return v; }
|
||||
Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag<short , int>) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_cvtepi16_epi32(v);
|
||||
#else
|
||||
return AVX::srai_epi32<16>(
|
||||
concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v)));
|
||||
#endif
|
||||
}
|
||||
Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag<ushort, int>) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_cvtepu16_epi32(v);
|
||||
#else
|
||||
return AVX::srli_epi32<16>(
|
||||
concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v)));
|
||||
#endif
|
||||
}
|
||||
|
||||
Vc_INTRINSIC __m256i convert(__m256 v, ConvertTag<float , uint>) {
|
||||
using namespace AVX;
|
||||
return _mm256_castps_si256(_mm256_blendv_ps(
|
||||
_mm256_castsi256_ps(_mm256_cvttps_epi32(v)),
|
||||
_mm256_castsi256_ps(add_epi32(_mm256_cvttps_epi32(_mm256_sub_ps(v, set2power31_ps())),
|
||||
set2power31_epu32())),
|
||||
cmpge_ps(v, set2power31_ps())));
|
||||
}
|
||||
Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag<double, uint>) {
|
||||
using namespace AVX;
|
||||
return _mm_xor_si128(
|
||||
_mm256_cvttpd_epi32(_mm256_sub_pd(_mm256_floor_pd(v), set1_pd(0x80000000u))),
|
||||
_mm_set2power31_epu32());
|
||||
}
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<int , uint>) { return v; }
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<uint , uint>) { return v; }
|
||||
Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag<short , uint>) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_cvtepi16_epi32(v);
|
||||
#else
|
||||
return AVX::srai_epi32<16>(
|
||||
concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v)));
|
||||
#endif
|
||||
}
|
||||
Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag<ushort, uint>) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_cvtepu16_epi32(v);
|
||||
#else
|
||||
return AVX::srli_epi32<16>(
|
||||
concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v)));
|
||||
#endif
|
||||
}
|
||||
|
||||
Vc_INTRINSIC __m256 convert(__m256 v, ConvertTag<float , float>) { return v; }
|
||||
Vc_INTRINSIC __m128 convert(__m256d v, ConvertTag<double, float>) { return _mm256_cvtpd_ps(v); }
|
||||
Vc_INTRINSIC __m256 convert(__m256i v, ConvertTag<int , float>) { return _mm256_cvtepi32_ps(v); }
|
||||
Vc_INTRINSIC __m256 convert(__m256i v, ConvertTag<uint , float>) {
|
||||
// this is complicated because cvtepi32_ps only supports signed input. Thus, all
|
||||
// input values with the MSB set would produce a negative result. We can reuse the
|
||||
// cvtepi32_ps instruction if we unset the MSB. But then the rounding results can be
|
||||
// different. Since float uses 24 bits for the mantissa (effectively), the 9-bit LSB
|
||||
// determines the rounding direction. (Consider the bits ...8'7654'3210. The bits [0:7]
|
||||
// need to be dropped and if > 0x80 round up, if < 0x80 round down. If [0:7] == 0x80
|
||||
// then the rounding direction is determined by bit [8] for round to even. That's why
|
||||
// the 9th bit is relevant for the rounding decision.)
|
||||
// If the MSB of the input is set to 0, the cvtepi32_ps instruction makes its rounding
|
||||
// decision on the lowest 8 bits instead. A second rounding decision is made when
|
||||
// float(0x8000'0000) is added. This will rarely fix the rounding issue.
|
||||
//
|
||||
// Here's what the standard rounding mode expects:
|
||||
// 0xc0000080 should cvt to 0xc0000000
|
||||
// 0xc0000081 should cvt to 0xc0000100
|
||||
// -- should cvt to 0xc0000100
|
||||
// 0xc000017f should cvt to 0xc0000100
|
||||
// 0xc0000180 should cvt to 0xc0000200
|
||||
//
|
||||
// However: using float(input ^ 0x8000'0000) + float(0x8000'0000) we get:
|
||||
// 0xc0000081 would cvt to 0xc0000000
|
||||
// 0xc00000c0 would cvt to 0xc0000000
|
||||
// 0xc00000c1 would cvt to 0xc0000100
|
||||
// 0xc000013f would cvt to 0xc0000100
|
||||
// 0xc0000140 would cvt to 0xc0000200
|
||||
//
|
||||
// Solution: float(input & 0x7fff'fe00) + (float(0x8000'0000) + float(input & 0x1ff))
|
||||
// This ensures the rounding decision is made on the 9-bit LSB when 0x8000'0000 is
|
||||
// added to the float value of the low 8 bits of the input.
|
||||
using namespace AVX;
|
||||
return _mm256_blendv_ps(
|
||||
_mm256_cvtepi32_ps(v),
|
||||
_mm256_add_ps(_mm256_cvtepi32_ps(and_si256(v, set1_epi32(0x7ffffe00))),
|
||||
_mm256_add_ps(set2power31_ps(), _mm256_cvtepi32_ps(and_si256(
|
||||
v, set1_epi32(0x000001ff))))),
|
||||
_mm256_castsi256_ps(cmplt_epi32(v, _mm256_setzero_si256())));
|
||||
}
|
||||
Vc_INTRINSIC __m256 convert(__m128i v, ConvertTag<short , float>) { return _mm256_cvtepi32_ps(convert(v, ConvertTag< short, int>())); }
|
||||
Vc_INTRINSIC __m256 convert(__m128i v, ConvertTag<ushort, float>) { return _mm256_cvtepi32_ps(convert(v, ConvertTag<ushort, int>())); }
|
||||
|
||||
Vc_INTRINSIC __m256d convert(__m128 v, ConvertTag<float , double>) { return _mm256_cvtps_pd(v); }
|
||||
Vc_INTRINSIC __m256d convert(__m256d v, ConvertTag<double, double>) { return v; }
|
||||
Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag<int , double>) { return _mm256_cvtepi32_pd(v); }
|
||||
Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag<uint , double>) {
|
||||
using namespace AVX;
|
||||
return _mm256_add_pd(
|
||||
_mm256_cvtepi32_pd(_mm_xor_si128(v, _mm_setmin_epi32())),
|
||||
set1_pd(1u << 31)); }
|
||||
Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag<short , double>) { return convert(convert(v, SSE::ConvertTag< short, int>()), ConvertTag<int, double>()); }
|
||||
Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag<ushort, double>) { return convert(convert(v, SSE::ConvertTag<ushort, int>()), ConvertTag<int, double>()); }
|
||||
|
||||
Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag<int , short>) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
auto a = _mm256_shuffle_epi8(
|
||||
v, _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80,
|
||||
-0x80, -0x80, -0x80, 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80,
|
||||
-0x80, -0x80, -0x80, -0x80, -0x80, -0x80));
|
||||
return lo128(_mm256_permute4x64_epi64(a, 0xf8)); // a[0] a[2] | a[3] a[3]
|
||||
#else
|
||||
const auto tmp0 = _mm_unpacklo_epi16(lo128(v), hi128(v));
|
||||
const auto tmp1 = _mm_unpackhi_epi16(lo128(v), hi128(v));
|
||||
const auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);
|
||||
const auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);
|
||||
return _mm_unpacklo_epi16(tmp2, tmp3);
|
||||
#endif
|
||||
}
|
||||
Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag<uint , short>) { return convert(v, ConvertTag<int, short>()); }
|
||||
Vc_INTRINSIC __m128i convert(__m256 v, ConvertTag<float , short>) { return convert(convert(v, ConvertTag<float, int>()), ConvertTag<int, short>()); }
|
||||
Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag<double, short>) { return convert(convert(v, ConvertTag<double, int>()), SSE::ConvertTag<int, short>()); }
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<short , short>) { return v; }
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<ushort, short>) { return v; }
|
||||
|
||||
Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag<int , ushort>) {
|
||||
auto tmp0 = _mm_unpacklo_epi16(lo128(v), hi128(v));
|
||||
auto tmp1 = _mm_unpackhi_epi16(lo128(v), hi128(v));
|
||||
auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);
|
||||
auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);
|
||||
return _mm_unpacklo_epi16(tmp2, tmp3);
|
||||
}
|
||||
Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag<uint , ushort>) {
|
||||
auto tmp0 = _mm_unpacklo_epi16(lo128(v), hi128(v));
|
||||
auto tmp1 = _mm_unpackhi_epi16(lo128(v), hi128(v));
|
||||
auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);
|
||||
auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);
|
||||
return _mm_unpacklo_epi16(tmp2, tmp3);
|
||||
}
|
||||
Vc_INTRINSIC __m128i convert(__m256 v, ConvertTag<float , ushort>) { return convert(convert(v, ConvertTag<float, uint>()), ConvertTag<uint, ushort>()); }
|
||||
Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag<double, ushort>) { return convert(convert(v, ConvertTag<double, uint>()), SSE::ConvertTag<uint, ushort>()); }
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<short , ushort>) { return v; }
|
||||
Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag<ushort, ushort>) { return v; }
|
||||
|
||||
template <typename From, typename To>
|
||||
Vc_INTRINSIC auto convert(
|
||||
typename std::conditional<(sizeof(From) < sizeof(To)),
|
||||
typename SSE::VectorTraits<From>::VectorType,
|
||||
typename AVX::VectorTypeHelper<From>::Type>::type v)
|
||||
-> decltype(convert(v, ConvertTag<From, To>()))
|
||||
{
|
||||
return convert(v, ConvertTag<From, To>());
|
||||
}
|
||||
|
||||
template <typename From, typename To, typename = enable_if<(sizeof(From) < sizeof(To))>>
|
||||
Vc_INTRINSIC auto convert(typename AVX::VectorTypeHelper<From>::Type v)
|
||||
-> decltype(convert(lo128(v), ConvertTag<From, To>()))
|
||||
{
|
||||
return convert(lo128(v), ConvertTag<From, To>());
|
||||
}
|
||||
} // namespace AVX
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_CASTS_H_
|
|
@ -0,0 +1,155 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_CONST_H_
|
||||
#define VC_AVX_CONST_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include "types.h"
|
||||
#include "const_data.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
template<typename T> struct IndexesFromZeroData;
|
||||
template<> struct IndexesFromZeroData<int> {
|
||||
static Vc_ALWAYS_INLINE Vc_CONST const int *address() { return reinterpret_cast<const int *>(&_IndexesFromZero32[0]); }
|
||||
};
|
||||
template<> struct IndexesFromZeroData<unsigned int> {
|
||||
static Vc_ALWAYS_INLINE Vc_CONST const unsigned int *address() { return &_IndexesFromZero32[0]; }
|
||||
};
|
||||
template<> struct IndexesFromZeroData<short> {
|
||||
static Vc_ALWAYS_INLINE Vc_CONST const short *address() { return reinterpret_cast<const short *>(&_IndexesFromZero16[0]); }
|
||||
};
|
||||
template<> struct IndexesFromZeroData<unsigned short> {
|
||||
static Vc_ALWAYS_INLINE Vc_CONST const unsigned short *address() { return &_IndexesFromZero16[0]; }
|
||||
};
|
||||
template<> struct IndexesFromZeroData<signed char> {
|
||||
static Vc_ALWAYS_INLINE Vc_CONST const signed char *address() { return reinterpret_cast<const signed char *>(&_IndexesFromZero8[0]); }
|
||||
};
|
||||
template<> struct IndexesFromZeroData<char> {
|
||||
static Vc_ALWAYS_INLINE Vc_CONST const char *address() { return reinterpret_cast<const char *>(&_IndexesFromZero8[0]); }
|
||||
};
|
||||
template<> struct IndexesFromZeroData<unsigned char> {
|
||||
static Vc_ALWAYS_INLINE Vc_CONST const unsigned char *address() { return &_IndexesFromZero8[0]; }
|
||||
};
|
||||
|
||||
template<typename _T> struct Const
|
||||
{
|
||||
typedef Vector<_T> V;
|
||||
typedef typename V::EntryType T;
|
||||
typedef typename V::Mask M;
|
||||
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _pi_4() { return V(c_trig<T>::data[0]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_hi() { return V(c_trig<T>::data[1]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_rem1() { return V(c_trig<T>::data[2]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_rem2() { return V(c_trig<T>::data[3]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _1_16() { return V(c_trig<T>::data[4]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _16() { return V(c_trig<T>::data[5]); }
|
||||
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V atanP(int i) { return V(c_trig<T>::data[(12 + i)]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V atanQ(int i) { return V(c_trig<T>::data[(17 + i)]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V atanThrsHi() { return V(c_trig<T>::data[22]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V atanThrsLo() { return V(c_trig<T>::data[23]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _pi_2_rem() { return V(c_trig<T>::data[24]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V lossThreshold() { return V(c_trig<T>::data[8]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _4_pi() { return V(c_trig<T>::data[9]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _pi_2() { return V(c_trig<T>::data[10]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _pi() { return V(c_trig<T>::data[11]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff0(int i) { return V(c_trig<T>::data[(28 + i)]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff1(int i) { return V(c_trig<T>::data[(33 + i)]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff2(int i) { return V(c_trig<T>::data[(37 + i)]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff3(int i) { return V(c_trig<T>::data[(43 + i)]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V smallAsinInput() { return V(c_trig<T>::data[25]); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V largeAsinInput() { return V(c_trig<T>::data[26]); }
|
||||
|
||||
static Vc_ALWAYS_INLINE Vc_CONST M exponentMask() { return M(V(c_log<T>::d(1)).data()); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _1_2() { return V(c_log<T>::d(18)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V _1_sqrt2() { return V(c_log<T>::d(15)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V P(int i) { return V(c_log<T>::d(2 + i)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V Q(int i) { return V(c_log<T>::d(8 + i)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V min() { return V(c_log<T>::d(14)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V ln2_small() { return V(c_log<T>::d(17)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V ln2_large() { return V(c_log<T>::d(16)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V neginf() { return V(c_log<T>::d(13)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V log10_e() { return V(c_log<T>::d(19)); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST V log2_e() { return V(c_log<T>::d(20)); }
|
||||
|
||||
static Vc_ALWAYS_INLINE_L Vc_CONST_L V highMask() Vc_ALWAYS_INLINE_R Vc_CONST_R;
|
||||
static Vc_ALWAYS_INLINE_L Vc_CONST_L V highMask(int bits) Vc_ALWAYS_INLINE_R Vc_CONST_R;
|
||||
};
|
||||
|
||||
template <> Vc_ALWAYS_INLINE Vc_CONST Vector<float> Const<float>::highMask()
|
||||
{
|
||||
return _mm256_broadcast_ss(
|
||||
reinterpret_cast<const float *>(&c_general::highMaskFloat));
|
||||
}
|
||||
template <> Vc_ALWAYS_INLINE Vc_CONST Vector<double> Const<double>::highMask()
|
||||
{
|
||||
return _mm256_broadcast_sd(
|
||||
reinterpret_cast<const double *>(&c_general::highMaskDouble));
|
||||
}
|
||||
template <> Vc_ALWAYS_INLINE Vc_CONST Vector<float> Const<float>::highMask(int bits)
|
||||
{
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
#if defined Vc_ICC || defined Vc_MSVC
|
||||
__m256i allone = _mm256_set1_epi64x(~0);
|
||||
#else
|
||||
auto allone = ~__m256i();
|
||||
#endif
|
||||
return _mm256_castsi256_ps(_mm256_slli_epi32(allone, bits));
|
||||
#else
|
||||
__m128 tmp = _mm_castsi128_ps(_mm_slli_epi32(_mm_setallone_si128(), bits));
|
||||
return concat(tmp, tmp);
|
||||
#endif
|
||||
}
|
||||
template <> Vc_ALWAYS_INLINE Vc_CONST Vector<double> Const<double>::highMask(int bits)
|
||||
{
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
#if defined Vc_ICC || defined Vc_MSVC
|
||||
__m256i allone = _mm256_set1_epi64x(~0);
|
||||
#else
|
||||
auto allone = ~__m256i();
|
||||
#endif
|
||||
return _mm256_castsi256_pd(_mm256_slli_epi64(allone, bits));
|
||||
#else
|
||||
__m128d tmp = _mm_castsi128_pd(_mm_slli_epi64(_mm_setallone_si128(), bits));
|
||||
return concat(tmp, tmp);
|
||||
#endif
|
||||
}
|
||||
} // namespace AVX
|
||||
|
||||
namespace AVX2
|
||||
{
|
||||
using AVX::IndexesFromZeroData;
|
||||
using AVX::Const;
|
||||
} // namespace AVX2
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_CONST_H_
|
|
@ -0,0 +1,100 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2012-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_CONST_DATA_H_
|
||||
#define VC_AVX_CONST_DATA_H_
|
||||
|
||||
#include "../common/data.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
|
||||
alignas(64) extern const unsigned int _IndexesFromZero32[ 8];
|
||||
alignas(16) extern const unsigned short _IndexesFromZero16[16];
|
||||
alignas(16) extern const unsigned char _IndexesFromZero8 [32];
|
||||
|
||||
struct alignas(64) c_general
|
||||
{
|
||||
static const float oneFloat;
|
||||
static const unsigned int absMaskFloat[2];
|
||||
static const unsigned int signMaskFloat[2];
|
||||
static const unsigned int highMaskFloat;
|
||||
static const unsigned short minShort[2];
|
||||
static const unsigned short one16[2];
|
||||
static const float _2power31;
|
||||
static const double oneDouble;
|
||||
static const unsigned long long frexpMask;
|
||||
static const unsigned long long highMaskDouble;
|
||||
};
|
||||
|
||||
template<typename T> struct c_trig
|
||||
{
|
||||
alignas(64) static const T data[];
|
||||
};
|
||||
#ifndef Vc_MSVC
|
||||
template <> alignas(64) const float c_trig<float>::data[];
|
||||
template <> alignas(64) const double c_trig<double>::data[];
|
||||
#endif
|
||||
|
||||
template<typename T> struct c_log
|
||||
{
|
||||
typedef float floatAlias Vc_MAY_ALIAS;
|
||||
static Vc_ALWAYS_INLINE float d(int i) { return *reinterpret_cast<const floatAlias *>(&data[i]); }
|
||||
alignas(64) static const unsigned int data[21];
|
||||
};
|
||||
#ifndef Vc_MSVC
|
||||
template<> alignas(64) const unsigned int c_log<float>::data[21];
|
||||
#endif
|
||||
|
||||
template<> struct c_log<double>
|
||||
{
|
||||
enum VectorSize { Size = 16 / sizeof(double) };
|
||||
typedef double doubleAlias Vc_MAY_ALIAS;
|
||||
static Vc_ALWAYS_INLINE double d(int i) { return *reinterpret_cast<const doubleAlias *>(&data[i]); }
|
||||
alignas(64) static const unsigned long long data[21];
|
||||
};
|
||||
|
||||
} // namespace AVX
|
||||
} // namespace Vc
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX2
|
||||
{
|
||||
using AVX::_IndexesFromZero8;
|
||||
using AVX::_IndexesFromZero16;
|
||||
using AVX::_IndexesFromZero32;
|
||||
using AVX::c_general;
|
||||
using AVX::c_trig;
|
||||
using AVX::c_log;
|
||||
} // namespace AVX2
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_CONST_DATA_H_
|
|
@ -0,0 +1,124 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_DEBUG_H_
|
||||
#define VC_AVX_DEBUG_H_
|
||||
|
||||
#ifndef NDEBUG
|
||||
#include "vector.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#endif
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
template <typename T, typename U> struct AddType {
|
||||
const U &d;
|
||||
};
|
||||
template <typename T, typename U> AddType<T, U> addType(const U &x) { return {x}; }
|
||||
|
||||
#ifdef NDEBUG
|
||||
class DebugStream
|
||||
{
|
||||
public:
|
||||
DebugStream(const char *, const char *, int) {}
|
||||
template<typename T> inline DebugStream &operator<<(const T &) { return *this; }
|
||||
};
|
||||
#else
|
||||
class DebugStream
|
||||
{
|
||||
private:
|
||||
template<typename T, typename V> static void printVector(V _x)
|
||||
{
|
||||
enum { Size = sizeof(V) / sizeof(T) };
|
||||
union { V v; T m[Size]; } x = { _x };
|
||||
std::cerr << '[' << std::setprecision(24) << x.m[0];
|
||||
for (int i = 1; i < Size; ++i) {
|
||||
std::cerr << ", " << std::setprecision(24) << x.m[i];
|
||||
}
|
||||
std::cerr << ']';
|
||||
}
|
||||
public:
|
||||
DebugStream(const char *func, const char *file, int line)
|
||||
{
|
||||
std::cerr << "\033[1;40;33mDEBUG: " << file << ':' << line << ' ' << func << ' ';
|
||||
}
|
||||
|
||||
template<typename T> DebugStream &operator<<(const T &x) { std::cerr << x; return *this; }
|
||||
|
||||
template <typename T, typename U> DebugStream &operator<<(AddType<T, U> &&x)
|
||||
{
|
||||
printVector<T, U>(x.d);
|
||||
return *this;
|
||||
}
|
||||
DebugStream &operator<<(__m128 x) {
|
||||
printVector<float, __m128>(x);
|
||||
return *this;
|
||||
}
|
||||
DebugStream &operator<<(__m256 x) {
|
||||
printVector<float, __m256>(x);
|
||||
return *this;
|
||||
}
|
||||
DebugStream &operator<<(__m128d x) {
|
||||
printVector<double, __m128d>(x);
|
||||
return *this;
|
||||
}
|
||||
DebugStream &operator<<(__m256d x) {
|
||||
printVector<double, __m256d>(x);
|
||||
return *this;
|
||||
}
|
||||
DebugStream &operator<<(__m128i x) {
|
||||
printVector<unsigned int, __m128i>(x);
|
||||
return *this;
|
||||
}
|
||||
DebugStream &operator<<(__m256i x) {
|
||||
printVector<unsigned int, __m256i>(x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~DebugStream()
|
||||
{
|
||||
std::cerr << "\033[0m" << std::endl;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef Vc_DEBUG
|
||||
#undef Vc_DEBUG
|
||||
#endif
|
||||
#ifdef Vc_MSVC
|
||||
#define Vc_DEBUG Vc::AVX::DebugStream(__FUNCSIG__, __FILE__, __LINE__)
|
||||
#else
|
||||
#define Vc_DEBUG Vc::AVX::DebugStream(__PRETTY_FUNCTION__, __FILE__, __LINE__)
|
||||
#endif
|
||||
|
||||
} // namespace AVX
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_DEBUG_H_
|
|
@ -0,0 +1,290 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2010-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX2
|
||||
{
|
||||
|
||||
inline void deinterleave(double_v &Vc_RESTRICT a, double_v &Vc_RESTRICT b, double_v &Vc_RESTRICT c)
|
||||
{ // estimated latency (AVX): 4.5 cycles
|
||||
const m256d tmp0 = Mem::shuffle128<X0, Y1>(a.data(), b.data());
|
||||
const m256d tmp1 = Mem::shuffle128<X1, Y0>(a.data(), c.data());
|
||||
const m256d tmp2 = Mem::shuffle128<X0, Y1>(b.data(), c.data());
|
||||
a.data() = Mem::shuffle<X0, Y1, X2, Y3>(tmp0, tmp1);
|
||||
b.data() = Mem::shuffle<X1, Y0, X3, Y2>(tmp0, tmp2);
|
||||
c.data() = Mem::shuffle<X0, Y1, X2, Y3>(tmp1, tmp2);
|
||||
}
|
||||
|
||||
inline void deinterleave(float_v &Vc_RESTRICT a, float_v &Vc_RESTRICT b, float_v &Vc_RESTRICT c)
|
||||
{
|
||||
// abc abc abc
|
||||
// a = [a0 b0 c0 a1 b1 c1 a2 b2] 332 = 211+121
|
||||
// b = [c2 a3 b3 c3 a4 b4 c4 a5] 323 = 112+211
|
||||
// c = [b5 c5 a6 b6 c6 a7 b7 c7] 233 = 121+112
|
||||
const m256 ac0 = Mem::shuffle128<X0, Y0>(a.data(), c.data()); // a0 b0 c0 a1 b5 c5 a6 b6
|
||||
const m256 ac1 = Mem::shuffle128<X1, Y1>(a.data(), c.data()); // b1 c1 a2 b2 c6 a7 b7 c7
|
||||
|
||||
m256 tmp0 = Mem::blend<X0, Y1, X2, X3, Y4, X5, X6, Y7>( ac0, b.data());
|
||||
tmp0 = Mem::blend<X0, X1, Y2, X3, X4, Y5, X6, X7>(tmp0, ac1); // a0 a3 a2 a1 a4 a7 a6 a5
|
||||
m256 tmp1 = Mem::blend<X0, X1, Y2, X3, X4, Y5, X6, X7>( ac0, b.data());
|
||||
tmp1 = Mem::blend<Y0, X1, X2, Y3, X4, X5, Y6, X7>(tmp1, ac1); // b1 b0 b3 b2 b5 b4 b7 b6
|
||||
m256 tmp2 = Mem::blend<Y0, X1, X2, Y3, X4, X5, Y6, X7>( ac0, b.data());
|
||||
tmp2 = Mem::blend<X0, Y1, X2, X3, Y4, X5, X6, Y7>(tmp2, ac1); // c2 c1 c0 c3 c6 c5 c4 c7
|
||||
|
||||
a.data() = Mem::permute<X0, X3, X2, X1>(tmp0);
|
||||
b.data() = Mem::permute<X1, X0, X3, X2>(tmp1);
|
||||
c.data() = Mem::permute<X2, X1, X0, X3>(tmp2);
|
||||
}
|
||||
|
||||
inline void deinterleave(int_v &Vc_RESTRICT a, int_v &Vc_RESTRICT b, int_v &Vc_RESTRICT c)
|
||||
{
|
||||
deinterleave(reinterpret_cast<float_v &>(a), reinterpret_cast<float_v &>(b),
|
||||
reinterpret_cast<float_v &>(c));
|
||||
}
|
||||
|
||||
inline void deinterleave(uint_v &Vc_RESTRICT a, uint_v &Vc_RESTRICT b, uint_v &Vc_RESTRICT c)
|
||||
{
|
||||
deinterleave(reinterpret_cast<float_v &>(a), reinterpret_cast<float_v &>(b),
|
||||
reinterpret_cast<float_v &>(c));
|
||||
}
|
||||
|
||||
inline void deinterleave(Vector<short> &Vc_RESTRICT , Vector<short> &Vc_RESTRICT ,
|
||||
Vector<short> &Vc_RESTRICT )
|
||||
{
|
||||
return;
|
||||
/* TODO:
|
||||
// abc abc abc
|
||||
// a = [a0 b0 c0 a1 b1 c1 a2 b2] 332 = 211+121
|
||||
// b = [c2 a3 b3 c3 a4 b4 c4 a5] 323 = 112+211
|
||||
// c = [b5 c5 a6 b6 c6 a7 b7 c7] 233 = 121+112
|
||||
m128i ac0 = _mm_unpacklo_epi64(a.data(), c.data()); // a0 b0 c0 a1 b5 c5 a6 b6
|
||||
m128i ac1 = _mm_unpackhi_epi64(a.data(), c.data()); // b1 c1 a2 b2 c6 a7 b7 c7
|
||||
|
||||
m128i tmp0 = Mem::blend<X0, Y1, X2, X3, Y4, X5, X6, Y7>( ac0, b.data());
|
||||
tmp0 = Mem::blend<X0, X1, Y2, X3, X4, Y5, X6, X7>(tmp0, ac1); // a0 a3 a2 a1 a4 a7 a6 a5
|
||||
m128i tmp1 = Mem::blend<X0, X1, Y2, X3, X4, Y5, X6, X7>( ac0, b.data());
|
||||
tmp1 = Mem::blend<Y0, X1, X2, Y3, X4, X5, Y6, X7>(tmp1, ac1); // b1 b0 b3 b2 b5 b4 b7 b6
|
||||
m128i tmp2 = Mem::blend<Y0, X1, X2, Y3, X4, X5, Y6, X7>( ac0, b.data());
|
||||
tmp2 = Mem::blend<X0, Y1, X2, X3, Y4, X5, X6, Y7>(tmp2, ac1); // c2 c1 c0 c3 c6 c5 c4 c7
|
||||
|
||||
a.data() = Mem::permuteHi<X4, X7, X6, X5>(Mem::permuteLo<X0, X3, X2, X1>(tmp0));
|
||||
b.data() = Mem::permuteHi<X5, X4, X7, X6>(Mem::permuteLo<X1, X0, X3, X2>(tmp1));
|
||||
c.data() = Mem::permuteHi<X6, X5, X4, X7>(Mem::permuteLo<X2, X1, X0, X3>(tmp2));
|
||||
*/
|
||||
}
|
||||
|
||||
inline void deinterleave(Vector<unsigned short> &Vc_RESTRICT a, Vector<unsigned short> &Vc_RESTRICT b,
|
||||
Vector<unsigned short> &Vc_RESTRICT c)
|
||||
{
|
||||
deinterleave(reinterpret_cast<Vector<short> &>(a), reinterpret_cast<Vector<short> &>(b),
|
||||
reinterpret_cast<Vector<short> &>(c));
|
||||
}
|
||||
|
||||
inline void deinterleave(Vector<float> &a, Vector<float> &b)
|
||||
{
|
||||
// a7 a6 a5 a4 a3 a2 a1 a0
|
||||
// b7 b6 b5 b4 b3 b2 b1 b0
|
||||
const m256 tmp0 = Reg::permute128<Y0, X0>(a.data(), b.data()); // b3 b2 b1 b0 a3 a2 a1 a0
|
||||
const m256 tmp1 = Reg::permute128<Y1, X1>(a.data(), b.data()); // b7 b6 b5 b4 a7 a6 a5 a4
|
||||
|
||||
const m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); // b5 b1 b4 b0 a5 a1 a4 a0
|
||||
const m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); // b7 b3 b6 b2 a7 a3 a6 a2
|
||||
|
||||
a.data() = _mm256_unpacklo_ps(tmp2, tmp3); // b6 b4 b2 b0 a6 a4 a2 a0
|
||||
b.data() = _mm256_unpackhi_ps(tmp2, tmp3); // b7 b5 b3 b1 a7 a5 a3 a1
|
||||
}
|
||||
|
||||
inline void deinterleave(Vector<short> &a, // a0 b0 a1 b1 a2 b2 a3 b3 | a4 b4 a5 ...
|
||||
Vector<short> &b) // a8 b8 a9 ...
|
||||
{
|
||||
auto v0 = Mem::shuffle128<X0, Y0>(a.data(), b.data());
|
||||
auto v1 = Mem::shuffle128<X1, Y1>(a.data(), b.data());
|
||||
auto v2 = AVX::unpacklo_epi16(v0, v1); // a0 a4 ...
|
||||
auto v3 = AVX::unpackhi_epi16(v0, v1); // a2 a6 ...
|
||||
v0 = AVX::unpacklo_epi16(v2, v3); // a0 a2 ...
|
||||
v1 = AVX::unpackhi_epi16(v2, v3); // a1 a3 ...
|
||||
a.data() = AVX::unpacklo_epi16(v0, v1); // a0 a1 ...
|
||||
b.data() = AVX::unpackhi_epi16(v0, v1); // b0 b1 ...
|
||||
}
|
||||
|
||||
inline void deinterleave(Vector<ushort> &a, Vector<ushort> &b)
|
||||
{
|
||||
auto v0 = Mem::shuffle128<X0, Y0>(a.data(), b.data());
|
||||
auto v1 = Mem::shuffle128<X1, Y1>(a.data(), b.data());
|
||||
auto v2 = AVX::unpacklo_epi16(v0, v1); // a0 a4 ...
|
||||
auto v3 = AVX::unpackhi_epi16(v0, v1); // a2 a6 ...
|
||||
v0 = AVX::unpacklo_epi16(v2, v3); // a0 a2 ...
|
||||
v1 = AVX::unpackhi_epi16(v2, v3); // a1 a3 ...
|
||||
a.data() = AVX::unpacklo_epi16(v0, v1); // a0 a1 ...
|
||||
b.data() = AVX::unpackhi_epi16(v0, v1); // b0 b1 ...
|
||||
}
|
||||
|
||||
} // namespace AVX2
|
||||
namespace Detail
|
||||
{
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::float_v &a, AVX2::float_v &b, const float *m, Flags align)
|
||||
{
|
||||
a.load(m, align);
|
||||
b.load(m + AVX2::float_v::Size, align);
|
||||
Vc::AVX2::deinterleave(a, b);
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::float_v &a, AVX2::float_v &b, const short *m, Flags f)
|
||||
{
|
||||
using namespace Vc::AVX2;
|
||||
const auto tmp = Detail::load32(m, f);
|
||||
a.data() =
|
||||
_mm256_cvtepi32_ps(concat(_mm_srai_epi32(_mm_slli_epi32(lo128(tmp), 16), 16),
|
||||
_mm_srai_epi32(_mm_slli_epi32(hi128(tmp), 16), 16)));
|
||||
b.data() = _mm256_cvtepi32_ps(
|
||||
concat(_mm_srai_epi32(lo128(tmp), 16), _mm_srai_epi32(hi128(tmp), 16)));
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::float_v &a, AVX2::float_v &b, const unsigned short *m, Flags f)
|
||||
{
|
||||
using namespace Vc::AVX2;
|
||||
const auto tmp = Detail::load32(m, f);
|
||||
a.data() = _mm256_cvtepi32_ps(
|
||||
concat(_mm_blend_epi16(lo128(tmp), _mm_setzero_si128(), 0xaa),
|
||||
_mm_blend_epi16(hi128(tmp), _mm_setzero_si128(), 0xaa)));
|
||||
b.data() = _mm256_cvtepi32_ps(
|
||||
concat(_mm_srli_epi32(lo128(tmp), 16), _mm_srli_epi32(hi128(tmp), 16)));
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::double_v &a, AVX2::double_v &b, const double *m, Flags align)
|
||||
{
|
||||
using namespace Vc::AVX2;
|
||||
|
||||
a.load(m, align);
|
||||
b.load(m + AVX2::double_v::Size, align);
|
||||
|
||||
m256d tmp0 = Mem::shuffle128<Vc::X0, Vc::Y0>(a.data(), b.data()); // b1 b0 a1 a0
|
||||
m256d tmp1 = Mem::shuffle128<Vc::X1, Vc::Y1>(a.data(), b.data()); // b3 b2 a3 a2
|
||||
|
||||
a.data() = _mm256_unpacklo_pd(tmp0, tmp1); // b2 b0 a2 a0
|
||||
b.data() = _mm256_unpackhi_pd(tmp0, tmp1); // b3 b1 a3 a1
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::int_v &a, AVX2::int_v &b, const int *m, Flags align)
|
||||
{
|
||||
using namespace AVX;
|
||||
a.load(m, align);
|
||||
b.load(m + AVX2::int_v::Size, align);
|
||||
|
||||
const m256 tmp0 = avx_cast<m256>(Mem::shuffle128<Vc::X0, Vc::Y0>(a.data(), b.data()));
|
||||
const m256 tmp1 = avx_cast<m256>(Mem::shuffle128<Vc::X1, Vc::Y1>(a.data(), b.data()));
|
||||
|
||||
const m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); // b5 b1 b4 b0 a5 a1 a4 a0
|
||||
const m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); // b7 b3 b6 b2 a7 a3 a6 a2
|
||||
|
||||
a.data() = avx_cast<m256i>(_mm256_unpacklo_ps(tmp2, tmp3)); // b6 b4 b2 b0 a6 a4 a2 a0
|
||||
b.data() = avx_cast<m256i>(_mm256_unpackhi_ps(tmp2, tmp3)); // b7 b5 b3 b1 a7 a5 a3 a1
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::int_v &a, AVX2::int_v &b, const short *m, Flags f)
|
||||
{
|
||||
using namespace Vc::AVX;
|
||||
const AVX2::short_v tmp0(m, f);
|
||||
const m256i tmp = tmp0.data();
|
||||
a.data() = concat(
|
||||
_mm_srai_epi32(_mm_slli_epi32(lo128(tmp), 16), 16),
|
||||
_mm_srai_epi32(_mm_slli_epi32(hi128(tmp), 16), 16));
|
||||
b.data() = concat(
|
||||
_mm_srai_epi32(lo128(tmp), 16),
|
||||
_mm_srai_epi32(hi128(tmp), 16));
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::uint_v &a, AVX2::uint_v &b, const unsigned int *m, Flags align)
|
||||
{
|
||||
using namespace AVX;
|
||||
a.load(m, align);
|
||||
b.load(m + AVX2::uint_v::Size, align);
|
||||
|
||||
const m256 tmp0 = avx_cast<m256>(Mem::shuffle128<Vc::X0, Vc::Y0>(a.data(), b.data()));
|
||||
const m256 tmp1 = avx_cast<m256>(Mem::shuffle128<Vc::X1, Vc::Y1>(a.data(), b.data()));
|
||||
|
||||
const m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); // b5 b1 b4 b0 a5 a1 a4 a0
|
||||
const m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); // b7 b3 b6 b2 a7 a3 a6 a2
|
||||
|
||||
a.data() = avx_cast<m256i>(_mm256_unpacklo_ps(tmp2, tmp3)); // b6 b4 b2 b0 a6 a4 a2 a0
|
||||
b.data() = avx_cast<m256i>(_mm256_unpackhi_ps(tmp2, tmp3)); // b7 b5 b3 b1 a7 a5 a3 a1
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::uint_v &a, AVX2::uint_v &b, const unsigned short *m, Flags f)
|
||||
{
|
||||
using namespace Vc::AVX;
|
||||
const AVX2::ushort_v tmp0(m, f);
|
||||
const m256i tmp = tmp0.data();
|
||||
a.data() = concat(
|
||||
_mm_srai_epi32(_mm_slli_epi32(lo128(tmp), 16), 16),
|
||||
_mm_srai_epi32(_mm_slli_epi32(hi128(tmp), 16), 16));
|
||||
b.data() = concat(
|
||||
_mm_srai_epi32(lo128(tmp), 16),
|
||||
_mm_srai_epi32(hi128(tmp), 16));
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::short_v &a, AVX2::short_v &b, const short *m, Flags align)
|
||||
{
|
||||
a.load(m, align);
|
||||
b.load(m + AVX2::short_v::Size, align);
|
||||
Vc::AVX2::deinterleave(a, b);
|
||||
}
|
||||
|
||||
template <typename Flags>
|
||||
inline void deinterleave(AVX2::ushort_v &a, AVX2::ushort_v &b, const unsigned short *m, Flags align)
|
||||
{
|
||||
a.load(m, align);
|
||||
b.load(m + AVX2::ushort_v::Size, align);
|
||||
Vc::AVX2::deinterleave(a, b);
|
||||
}
|
||||
|
||||
// only support M == V::EntryType -> no specialization
|
||||
template <typename T, typename M, typename Flags>
|
||||
Vc_ALWAYS_INLINE void deinterleave(AVX2::Vector<T> &Vc_RESTRICT a,
|
||||
AVX2::Vector<T> &Vc_RESTRICT b,
|
||||
AVX2::Vector<T> &Vc_RESTRICT c,
|
||||
const M *Vc_RESTRICT memory, Flags align)
|
||||
{
|
||||
using V = AVX2::Vector<T>;
|
||||
a.load(&memory[0 * V::Size], align);
|
||||
b.load(&memory[1 * V::Size], align);
|
||||
c.load(&memory[2 * V::Size], align);
|
||||
Vc::AVX2::deinterleave(a, b, c);
|
||||
}
|
||||
|
||||
} // namespace Detail
|
||||
} // namespace Vc
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,119 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_HELPERIMPL_H_
|
||||
#define VC_AVX_HELPERIMPL_H_
|
||||
|
||||
#include "../sse/helperimpl.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::float_v &, AVX2::float_v &, const float *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::float_v &, AVX2::float_v &, const short *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::float_v &, AVX2::float_v &, const ushort *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::double_v &, AVX2::double_v &, const double *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::int_v &, AVX2::int_v &, const int *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::int_v &, AVX2::int_v &, const short *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::uint_v &, AVX2::uint_v &, const uint *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::uint_v &, AVX2::uint_v &, const ushort *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::short_v &, AVX2::short_v &, const short *, A);
|
||||
template <typename A>
|
||||
inline void deinterleave(AVX2::ushort_v &, AVX2::ushort_v &, const ushort *, A);
|
||||
|
||||
template <typename T, typename M, typename A>
|
||||
Vc_ALWAYS_INLINE_L void deinterleave(AVX2::Vector<T> &Vc_RESTRICT a,
|
||||
AVX2::Vector<T> &Vc_RESTRICT b,
|
||||
AVX2::Vector<T> &Vc_RESTRICT c,
|
||||
const M *Vc_RESTRICT memory,
|
||||
A align) Vc_ALWAYS_INLINE_R;
|
||||
template <typename T, typename M, typename A>
|
||||
Vc_ALWAYS_INLINE_L void deinterleave(AVX2::Vector<T> &Vc_RESTRICT a,
|
||||
AVX2::Vector<T> &Vc_RESTRICT b,
|
||||
AVX2::Vector<T> &Vc_RESTRICT c,
|
||||
AVX2::Vector<T> &Vc_RESTRICT d,
|
||||
const M *Vc_RESTRICT memory,
|
||||
A align) Vc_ALWAYS_INLINE_R;
|
||||
template <typename T, typename M, typename A>
|
||||
Vc_ALWAYS_INLINE_L void deinterleave(AVX2::Vector<T> &Vc_RESTRICT a,
|
||||
AVX2::Vector<T> &Vc_RESTRICT b,
|
||||
AVX2::Vector<T> &Vc_RESTRICT c,
|
||||
AVX2::Vector<T> &Vc_RESTRICT d,
|
||||
AVX2::Vector<T> &Vc_RESTRICT e,
|
||||
const M *Vc_RESTRICT memory,
|
||||
A align) Vc_ALWAYS_INLINE_R;
|
||||
template <typename T, typename M, typename A>
|
||||
Vc_ALWAYS_INLINE_L void deinterleave(
|
||||
AVX2::Vector<T> &Vc_RESTRICT a, AVX2::Vector<T> &Vc_RESTRICT b,
|
||||
AVX2::Vector<T> &Vc_RESTRICT c, AVX2::Vector<T> &Vc_RESTRICT d,
|
||||
AVX2::Vector<T> &Vc_RESTRICT e, AVX2::Vector<T> &Vc_RESTRICT f,
|
||||
const M *Vc_RESTRICT memory, A align) Vc_ALWAYS_INLINE_R;
|
||||
template <typename T, typename M, typename A>
|
||||
Vc_ALWAYS_INLINE_L void deinterleave(
|
||||
AVX2::Vector<T> &Vc_RESTRICT a, AVX2::Vector<T> &Vc_RESTRICT b,
|
||||
AVX2::Vector<T> &Vc_RESTRICT c, AVX2::Vector<T> &Vc_RESTRICT d,
|
||||
AVX2::Vector<T> &Vc_RESTRICT e, AVX2::Vector<T> &Vc_RESTRICT f,
|
||||
AVX2::Vector<T> &Vc_RESTRICT g, AVX2::Vector<T> &Vc_RESTRICT h,
|
||||
const M *Vc_RESTRICT memory, A align) Vc_ALWAYS_INLINE_R;
|
||||
|
||||
Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr, VectorAbi::Avx)
|
||||
{
|
||||
prefetchForOneRead(addr, VectorAbi::Sse());
|
||||
}
|
||||
Vc_ALWAYS_INLINE void prefetchForModify(const void *addr, VectorAbi::Avx)
|
||||
{
|
||||
prefetchForModify(addr, VectorAbi::Sse());
|
||||
}
|
||||
Vc_ALWAYS_INLINE void prefetchClose(const void *addr, VectorAbi::Avx)
|
||||
{
|
||||
prefetchClose(addr, VectorAbi::Sse());
|
||||
}
|
||||
Vc_ALWAYS_INLINE void prefetchMid(const void *addr, VectorAbi::Avx)
|
||||
{
|
||||
prefetchMid(addr, VectorAbi::Sse());
|
||||
}
|
||||
Vc_ALWAYS_INLINE void prefetchFar(const void *addr, VectorAbi::Avx)
|
||||
{
|
||||
prefetchFar(addr, VectorAbi::Sse());
|
||||
}
|
||||
} // namespace Detail
|
||||
} // namespace Vc
|
||||
|
||||
#include "deinterleave.tcc"
|
||||
|
||||
#endif // VC_AVX_HELPERIMPL_H_
|
|
@ -0,0 +1,670 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_INTRINSICS_H_
|
||||
#define VC_AVX_INTRINSICS_H_
|
||||
|
||||
#include "../global.h"
|
||||
#include "../traits/type_traits.h"
|
||||
|
||||
// see comment in sse/intrinsics.h
|
||||
extern "C" {
|
||||
// AVX
|
||||
#include <immintrin.h>
|
||||
|
||||
#if (defined(Vc_IMPL_XOP) || defined(Vc_IMPL_FMA4)) && !defined(Vc_MSVC)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "../common/fix_clang_emmintrin.h"
|
||||
|
||||
#include "const_data.h"
|
||||
#include "../common/types.h"
|
||||
#include "macros.h"
|
||||
#include <cstdlib>
|
||||
|
||||
#if (defined Vc_CLANG && Vc_CLANG >= 0x30900 && Vc_CLANG < 0x70000)
|
||||
#ifdef _mm256_permute2f128_si256
|
||||
#undef _mm256_permute2f128_si256
|
||||
#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
|
||||
(__v8si)(__m256i)(V2), (char)(M)); })
|
||||
#endif
|
||||
|
||||
#ifdef _mm256_permute2f128_ps
|
||||
#undef _mm256_permute2f128_ps
|
||||
#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
|
||||
(__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
|
||||
(__v8sf)(__m256)(V2), (char)(M)); })
|
||||
#endif
|
||||
|
||||
#ifdef _mm256_permute2x128_si256
|
||||
#undef _mm256_permute2x128_si256
|
||||
#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (char)(M)); })
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AvxIntrinsics
|
||||
{
|
||||
using AVX::c_general;
|
||||
using AVX::_IndexesFromZero32;
|
||||
using AVX::_IndexesFromZero16;
|
||||
using AVX::_IndexesFromZero8;
|
||||
|
||||
typedef __m128 m128 ;
|
||||
typedef __m128d m128d;
|
||||
typedef __m128i m128i;
|
||||
typedef __m256 m256 ;
|
||||
typedef __m256d m256d;
|
||||
typedef __m256i m256i;
|
||||
|
||||
#ifdef Vc_GCC
|
||||
// Redefine the mul/add/sub intrinsics to use GCC-specific operators instead of builtin
|
||||
// functions. This way the fp-contraction optimization step kicks in and creates FMAs! :)
|
||||
static Vc_INTRINSIC Vc_CONST m256d _mm256_mul_pd(m256d a, m256d b) { return static_cast<m256d>(static_cast<__v4df>(a) * static_cast<__v4df>(b)); }
|
||||
static Vc_INTRINSIC Vc_CONST m256d _mm256_add_pd(m256d a, m256d b) { return static_cast<m256d>(static_cast<__v4df>(a) + static_cast<__v4df>(b)); }
|
||||
static Vc_INTRINSIC Vc_CONST m256d _mm256_sub_pd(m256d a, m256d b) { return static_cast<m256d>(static_cast<__v4df>(a) - static_cast<__v4df>(b)); }
|
||||
static Vc_INTRINSIC Vc_CONST m256 _mm256_mul_ps(m256 a, m256 b) { return static_cast<m256>(static_cast<__v8sf>(a) * static_cast<__v8sf>(b)); }
|
||||
static Vc_INTRINSIC Vc_CONST m256 _mm256_add_ps(m256 a, m256 b) { return static_cast<m256>(static_cast<__v8sf>(a) + static_cast<__v8sf>(b)); }
|
||||
static Vc_INTRINSIC Vc_CONST m256 _mm256_sub_ps(m256 a, m256 b) { return static_cast<m256>(static_cast<__v8sf>(a) - static_cast<__v8sf>(b)); }
|
||||
#endif
|
||||
|
||||
static Vc_INTRINSIC m256d Vc_CONST set1_pd (double a) { return _mm256_set1_pd (a); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST set1_epi32(int a) { return _mm256_set1_epi32(a); }
|
||||
|
||||
static Vc_INTRINSIC Vc_CONST m128i _mm_setallone_si128() { return _mm_load_si128(reinterpret_cast<const __m128i *>(Common::AllBitsSet)); }
|
||||
static Vc_INTRINSIC Vc_CONST m128 _mm_setallone_ps() { return _mm_load_ps(reinterpret_cast<const float *>(Common::AllBitsSet)); }
|
||||
static Vc_INTRINSIC Vc_CONST m128d _mm_setallone_pd() { return _mm_load_pd(reinterpret_cast<const double *>(Common::AllBitsSet)); }
|
||||
|
||||
static Vc_INTRINSIC Vc_CONST m256i setallone_si256() { return _mm256_castps_si256(_mm256_load_ps(reinterpret_cast<const float *>(Common::AllBitsSet))); }
|
||||
static Vc_INTRINSIC Vc_CONST m256d setallone_pd() { return _mm256_load_pd(reinterpret_cast<const double *>(Common::AllBitsSet)); }
|
||||
static Vc_INTRINSIC Vc_CONST m256 setallone_ps() { return _mm256_load_ps(reinterpret_cast<const float *>(Common::AllBitsSet)); }
|
||||
|
||||
static Vc_INTRINSIC m256i Vc_CONST setone_epi8 () { return _mm256_set1_epi8(1); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST setone_epu8 () { return setone_epi8(); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST setone_epi16() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast<const float *>(c_general::one16))); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST setone_epu16() { return setone_epi16(); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST setone_epi32() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast<const float *>(&_IndexesFromZero32[1]))); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST setone_epu32() { return setone_epi32(); }
|
||||
|
||||
static Vc_INTRINSIC m256 Vc_CONST setone_ps() { return _mm256_broadcast_ss(&c_general::oneFloat); }
|
||||
static Vc_INTRINSIC m256d Vc_CONST setone_pd() { return _mm256_broadcast_sd(&c_general::oneDouble); }
|
||||
|
||||
static Vc_INTRINSIC m256d Vc_CONST setabsmask_pd() { return _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::absMaskFloat[0])); }
|
||||
static Vc_INTRINSIC m256 Vc_CONST setabsmask_ps() { return _mm256_broadcast_ss(reinterpret_cast<const float *>(&c_general::absMaskFloat[1])); }
|
||||
static Vc_INTRINSIC m256d Vc_CONST setsignmask_pd(){ return _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::signMaskFloat[0])); }
|
||||
static Vc_INTRINSIC m256 Vc_CONST setsignmask_ps(){ return _mm256_broadcast_ss(reinterpret_cast<const float *>(&c_general::signMaskFloat[1])); }
|
||||
|
||||
static Vc_INTRINSIC m256 Vc_CONST set2power31_ps() { return _mm256_broadcast_ss(&c_general::_2power31); }
|
||||
static Vc_INTRINSIC m128 Vc_CONST _mm_set2power31_ps() { return _mm_broadcast_ss(&c_general::_2power31); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST set2power31_epu32() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast<const float *>(&c_general::signMaskFloat[1]))); }
|
||||
static Vc_INTRINSIC m128i Vc_CONST _mm_set2power31_epu32() { return _mm_castps_si128(_mm_broadcast_ss(reinterpret_cast<const float *>(&c_general::signMaskFloat[1]))); }
|
||||
|
||||
static Vc_INTRINSIC m256i Vc_CONST setmin_epi8 () { return _mm256_set1_epi8(-0x80); }
|
||||
static Vc_INTRINSIC m128i Vc_CONST _mm_setmin_epi16() { return _mm_castps_si128(_mm_broadcast_ss(reinterpret_cast<const float *>(c_general::minShort))); }
|
||||
static Vc_INTRINSIC m128i Vc_CONST _mm_setmin_epi32() { return _mm_castps_si128(_mm_broadcast_ss(reinterpret_cast<const float *>(&c_general::signMaskFloat[1]))); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST setmin_epi16() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast<const float *>(c_general::minShort))); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST setmin_epi32() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast<const float *>(&c_general::signMaskFloat[1]))); }
|
||||
|
||||
template <int i>
|
||||
static Vc_INTRINSIC Vc_CONST unsigned int extract_epu32(__m128i x)
|
||||
{
|
||||
return _mm_extract_epi32(x, i);
|
||||
}
|
||||
|
||||
template <int offset> Vc_INTRINSIC __m256 insert128(__m256 a, __m128 b) { return _mm256_insertf128_ps(a, b, offset); }
|
||||
template <int offset> Vc_INTRINSIC __m256d insert128(__m256d a, __m128d b) { return _mm256_insertf128_pd(a, b, offset); }
|
||||
template <int offset> Vc_INTRINSIC __m256i insert128(__m256i a, __m128i b) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_inserti128_si256(a, b, offset);
|
||||
#else
|
||||
return _mm256_insertf128_si256(a, b, offset);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int offset> Vc_INTRINSIC __m128 extract128(__m256 a) { return _mm256_extractf128_ps(a, offset); }
|
||||
template <int offset> Vc_INTRINSIC __m128d extract128(__m256d a) { return _mm256_extractf128_pd(a, offset); }
|
||||
template <int offset> Vc_INTRINSIC __m128i extract128(__m256i a) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_extracti128_si256(a, offset);
|
||||
#else
|
||||
return _mm256_extractf128_si256(a, offset);
|
||||
#endif
|
||||
}
|
||||
|
||||
/////////////////////// COMPARE OPS ///////////////////////
|
||||
#ifdef Vc_GCC
|
||||
// GCC needs builtin compare operators to enable constant folding
|
||||
Vc_INTRINSIC __m256d cmpeq_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a == b); }
|
||||
Vc_INTRINSIC __m256d cmpneq_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a != b); }
|
||||
Vc_INTRINSIC __m256d cmplt_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a < b); }
|
||||
Vc_INTRINSIC __m256d cmpge_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a >= b); }
|
||||
Vc_INTRINSIC __m256d cmple_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a <= b); }
|
||||
Vc_INTRINSIC __m256d cmpgt_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a > b); }
|
||||
|
||||
Vc_INTRINSIC __m256 cmpeq_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a == b); }
|
||||
Vc_INTRINSIC __m256 cmpneq_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a != b); }
|
||||
Vc_INTRINSIC __m256 cmplt_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a < b); }
|
||||
Vc_INTRINSIC __m256 cmpge_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a >= b); }
|
||||
Vc_INTRINSIC __m256 cmple_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a <= b); }
|
||||
Vc_INTRINSIC __m256 cmpgt_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a > b); }
|
||||
#else
|
||||
Vc_INTRINSIC __m256d cmpeq_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_EQ_OQ); }
|
||||
Vc_INTRINSIC __m256d cmpneq_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_NEQ_UQ); }
|
||||
Vc_INTRINSIC __m256d cmplt_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_LT_OS); }
|
||||
Vc_INTRINSIC __m256d cmpge_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_NLT_US); }
|
||||
Vc_INTRINSIC __m256d cmple_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_LE_OS); }
|
||||
Vc_INTRINSIC __m256d cmpgt_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_NLE_US); }
|
||||
|
||||
Vc_INTRINSIC __m256 cmpeq_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_EQ_OQ); }
|
||||
Vc_INTRINSIC __m256 cmpneq_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ); }
|
||||
Vc_INTRINSIC __m256 cmplt_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_LT_OS); }
|
||||
Vc_INTRINSIC __m256 cmpge_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_NLT_US); }
|
||||
Vc_INTRINSIC __m256 cmple_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_LE_OS); }
|
||||
Vc_INTRINSIC __m256 cmpgt_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_NLE_US); }
|
||||
#endif
|
||||
Vc_INTRINSIC __m256d cmpnlt_pd (__m256d a, __m256d b) { return cmpge_pd(a, b); }
|
||||
Vc_INTRINSIC __m256d cmpnle_pd (__m256d a, __m256d b) { return cmpgt_pd(a, b); }
|
||||
Vc_INTRINSIC __m256 cmpnlt_ps (__m256 a, __m256 b) { return cmpge_ps(a, b); }
|
||||
Vc_INTRINSIC __m256 cmpnle_ps (__m256 a, __m256 b) { return cmpgt_ps(a, b); }
|
||||
|
||||
Vc_INTRINSIC __m256d cmpord_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_ORD_Q); }
|
||||
Vc_INTRINSIC __m256d cmpunord_pd(__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_UNORD_Q); }
|
||||
Vc_INTRINSIC __m256 cmpord_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_ORD_Q); }
|
||||
Vc_INTRINSIC __m256 cmpunord_ps(__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_UNORD_Q); }
|
||||
|
||||
#if defined(Vc_IMPL_XOP)
|
||||
static Vc_INTRINSIC m128i cmplt_epu16(__m128i a, __m128i b) {
|
||||
return _mm_comlt_epu16(a, b);
|
||||
}
|
||||
static Vc_INTRINSIC m128i cmpgt_epu16(__m128i a, __m128i b) {
|
||||
return _mm_comgt_epu16(a, b);
|
||||
}
|
||||
#else
|
||||
static Vc_INTRINSIC m128i cmplt_epu16(__m128i a, __m128i b) {
|
||||
return _mm_cmplt_epi16(_mm_xor_si128(a, _mm_setmin_epi16()), _mm_xor_si128(b, _mm_setmin_epi16()));
|
||||
}
|
||||
static Vc_INTRINSIC m128i cmpgt_epu16(__m128i a, __m128i b) {
|
||||
return _mm_cmpgt_epi16(_mm_xor_si128(a, _mm_setmin_epi16()), _mm_xor_si128(b, _mm_setmin_epi16()));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <int shift> Vc_INTRINSIC Vc_CONST m256i alignr(__m256i s1, __m256i s2)
|
||||
{
|
||||
return _mm256_alignr_epi8(s1, s2, shift);
|
||||
}
|
||||
#else
|
||||
template <int shift> Vc_INTRINSIC Vc_CONST m256i alignr(__m256i s1, __m256i s2)
|
||||
{
|
||||
return insert128<1>(
|
||||
_mm256_castsi128_si256(_mm_alignr_epi8(_mm256_castsi256_si128(s1),
|
||||
_mm256_castsi256_si128(s2), shift)),
|
||||
_mm_alignr_epi8(extract128<1>(s1), extract128<1>(s2), shift));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
#define Vc_AVX_TO_SSE_2_NEW(name) \
|
||||
Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m256i b0) \
|
||||
{ \
|
||||
return _mm256_##name(a0, b0); \
|
||||
}
|
||||
#define Vc_AVX_TO_SSE_256_128(name) \
|
||||
Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m128i b0) \
|
||||
{ \
|
||||
return _mm256_##name(a0, b0); \
|
||||
}
|
||||
#define Vc_AVX_TO_SSE_1i(name) \
|
||||
template <int i> Vc_INTRINSIC Vc_CONST m256i name(__m256i a0) \
|
||||
{ \
|
||||
return _mm256_##name(a0, i); \
|
||||
}
|
||||
#define Vc_AVX_TO_SSE_1(name) \
|
||||
Vc_INTRINSIC Vc_CONST __m256i name(__m256i a0) { return _mm256_##name(a0); }
|
||||
#define Vc_AVX_TO_SSE_1_128(name, shift__) \
|
||||
Vc_INTRINSIC Vc_CONST __m256i name(__m128i a0) { return _mm256_##name(a0); }
|
||||
#else
|
||||
/**\internal
|
||||
* Defines the function \p name, which takes to __m256i arguments and calls `_mm_##name` on the low
|
||||
* and high 128 bit halfs of the arguments.
|
||||
*
|
||||
* In case the AVX2 intrinsics are enabled, the arguments are directly passed to a single
|
||||
* `_mm256_##name` call.
|
||||
*/
|
||||
#define Vc_AVX_TO_SSE_1(name) \
|
||||
Vc_INTRINSIC Vc_CONST __m256i name(__m256i a0) \
|
||||
{ \
|
||||
__m128i a1 = extract128<1>(a0); \
|
||||
__m128i r0 = _mm_##name(_mm256_castsi256_si128(a0)); \
|
||||
__m128i r1 = _mm_##name(a1); \
|
||||
return insert128<1>(_mm256_castsi128_si256(r0), r1); \
|
||||
}
|
||||
#define Vc_AVX_TO_SSE_1_128(name, shift__) \
|
||||
Vc_INTRINSIC Vc_CONST __m256i name(__m128i a0) \
|
||||
{ \
|
||||
__m128i r0 = _mm_##name(a0); \
|
||||
__m128i r1 = _mm_##name(_mm_srli_si128(a0, shift__)); \
|
||||
return insert128<1>(_mm256_castsi128_si256(r0), r1); \
|
||||
}
|
||||
#define Vc_AVX_TO_SSE_2_NEW(name) \
|
||||
Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m256i b0) \
|
||||
{ \
|
||||
m128i a1 = extract128<1>(a0); \
|
||||
m128i b1 = extract128<1>(b0); \
|
||||
m128i r0 = _mm_##name(_mm256_castsi256_si128(a0), _mm256_castsi256_si128(b0)); \
|
||||
m128i r1 = _mm_##name(a1, b1); \
|
||||
return insert128<1>(_mm256_castsi128_si256(r0), r1); \
|
||||
}
|
||||
#define Vc_AVX_TO_SSE_256_128(name) \
|
||||
Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m128i b0) \
|
||||
{ \
|
||||
m128i a1 = extract128<1>(a0); \
|
||||
m128i r0 = _mm_##name(_mm256_castsi256_si128(a0), b0); \
|
||||
m128i r1 = _mm_##name(a1, b0); \
|
||||
return insert128<1>(_mm256_castsi128_si256(r0), r1); \
|
||||
}
|
||||
#define Vc_AVX_TO_SSE_1i(name) \
|
||||
template <int i> Vc_INTRINSIC Vc_CONST m256i name(__m256i a0) \
|
||||
{ \
|
||||
m128i a1 = extract128<1>(a0); \
|
||||
m128i r0 = _mm_##name(_mm256_castsi256_si128(a0), i); \
|
||||
m128i r1 = _mm_##name(a1, i); \
|
||||
return insert128<1>(_mm256_castsi128_si256(r0), r1); \
|
||||
}
|
||||
#endif
|
||||
Vc_INTRINSIC Vc_CONST __m128i sll_epi16(__m128i a, __m128i b) { return _mm_sll_epi16(a, b); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i sll_epi32(__m128i a, __m128i b) { return _mm_sll_epi32(a, b); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i sll_epi64(__m128i a, __m128i b) { return _mm_sll_epi64(a, b); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i srl_epi16(__m128i a, __m128i b) { return _mm_srl_epi16(a, b); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i srl_epi32(__m128i a, __m128i b) { return _mm_srl_epi32(a, b); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i srl_epi64(__m128i a, __m128i b) { return _mm_srl_epi64(a, b); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i sra_epi16(__m128i a, __m128i b) { return _mm_sra_epi16(a, b); }
|
||||
Vc_INTRINSIC Vc_CONST __m128i sra_epi32(__m128i a, __m128i b) { return _mm_sra_epi32(a, b); }
|
||||
|
||||
Vc_AVX_TO_SSE_1i(slli_epi16)
|
||||
Vc_AVX_TO_SSE_1i(slli_epi32)
|
||||
Vc_AVX_TO_SSE_1i(slli_epi64)
|
||||
Vc_AVX_TO_SSE_1i(srai_epi16)
|
||||
Vc_AVX_TO_SSE_1i(srai_epi32)
|
||||
Vc_AVX_TO_SSE_1i(srli_epi16)
|
||||
Vc_AVX_TO_SSE_1i(srli_epi32)
|
||||
Vc_AVX_TO_SSE_1i(srli_epi64)
|
||||
|
||||
Vc_AVX_TO_SSE_256_128(sll_epi16)
|
||||
Vc_AVX_TO_SSE_256_128(sll_epi32)
|
||||
Vc_AVX_TO_SSE_256_128(sll_epi64)
|
||||
Vc_AVX_TO_SSE_256_128(srl_epi16)
|
||||
Vc_AVX_TO_SSE_256_128(srl_epi32)
|
||||
Vc_AVX_TO_SSE_256_128(srl_epi64)
|
||||
Vc_AVX_TO_SSE_256_128(sra_epi16)
|
||||
Vc_AVX_TO_SSE_256_128(sra_epi32)
|
||||
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpeq_epi8)
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpeq_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpeq_epi32)
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpeq_epi64)
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpgt_epi8)
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpgt_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpgt_epi32)
|
||||
Vc_AVX_TO_SSE_2_NEW(cmpgt_epi64)
|
||||
Vc_AVX_TO_SSE_2_NEW(unpackhi_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(unpacklo_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(add_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(add_epi32)
|
||||
Vc_AVX_TO_SSE_2_NEW(add_epi64)
|
||||
Vc_AVX_TO_SSE_2_NEW(sub_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(sub_epi32)
|
||||
Vc_AVX_TO_SSE_2_NEW(mullo_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(sign_epi16)
|
||||
Vc_AVX_TO_SSE_2_NEW(sign_epi32)
|
||||
Vc_AVX_TO_SSE_2_NEW(min_epi8)
|
||||
Vc_AVX_TO_SSE_2_NEW(max_epi8)
|
||||
Vc_AVX_TO_SSE_2_NEW(min_epu16)
|
||||
Vc_AVX_TO_SSE_2_NEW(max_epu16)
|
||||
Vc_AVX_TO_SSE_2_NEW(min_epi32)
|
||||
Vc_AVX_TO_SSE_2_NEW(max_epi32)
|
||||
Vc_AVX_TO_SSE_2_NEW(min_epu32)
|
||||
Vc_AVX_TO_SSE_2_NEW(max_epu32)
|
||||
Vc_AVX_TO_SSE_2_NEW(mullo_epi32)
|
||||
|
||||
Vc_AVX_TO_SSE_1(abs_epi8)
|
||||
Vc_AVX_TO_SSE_1(abs_epi16)
|
||||
Vc_AVX_TO_SSE_1(abs_epi32)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepi8_epi16, 8)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepi8_epi32, 4)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepi8_epi64, 2)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepi16_epi32, 8)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepi16_epi64, 4)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepi32_epi64, 8)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepu8_epi16, 8)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepu8_epi32, 4)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepu8_epi64, 2)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepu16_epi32, 8)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepu16_epi64, 4)
|
||||
Vc_AVX_TO_SSE_1_128(cvtepu32_epi64, 8)
|
||||
#ifndef Vc_IMPL_AVX2
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// implementation of the intrinsics missing in AVX
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static Vc_INTRINSIC m256i Vc_CONST and_si256(__m256i x, __m256i y) {
|
||||
return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y)));
|
||||
}
|
||||
static Vc_INTRINSIC m256i Vc_CONST andnot_si256(__m256i x, __m256i y) {
|
||||
return _mm256_castps_si256(_mm256_andnot_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y)));
|
||||
}
|
||||
static Vc_INTRINSIC m256i Vc_CONST or_si256(__m256i x, __m256i y) {
|
||||
return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y)));
|
||||
}
|
||||
static Vc_INTRINSIC m256i Vc_CONST xor_si256(__m256i x, __m256i y) {
|
||||
return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y)));
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Vc_CONST int movemask_epi8(__m256i a0)
|
||||
{
|
||||
m128i a1 = extract128<1>(a0);
|
||||
return (_mm_movemask_epi8(a1) << 16) | _mm_movemask_epi8(_mm256_castsi256_si128(a0));
|
||||
}
|
||||
template <int m> Vc_INTRINSIC Vc_CONST m256i blend_epi16(__m256i a0, __m256i b0)
|
||||
{
|
||||
m128i a1 = extract128<1>(a0);
|
||||
m128i b1 = extract128<1>(b0);
|
||||
m128i r0 = _mm_blend_epi16(_mm256_castsi256_si128(a0), _mm256_castsi256_si128(b0), m & 0xff);
|
||||
m128i r1 = _mm_blend_epi16(a1, b1, m >> 8);
|
||||
return insert128<1>(_mm256_castsi128_si256(r0), r1);
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST m256i blendv_epi8(__m256i a0, __m256i b0, __m256i m0) {
|
||||
m128i a1 = extract128<1>(a0);
|
||||
m128i b1 = extract128<1>(b0);
|
||||
m128i m1 = extract128<1>(m0);
|
||||
m128i r0 = _mm_blendv_epi8(_mm256_castsi256_si128(a0), _mm256_castsi256_si128(b0), _mm256_castsi256_si128(m0));
|
||||
m128i r1 = _mm_blendv_epi8(a1, b1, m1);
|
||||
return insert128<1>(_mm256_castsi128_si256(r0), r1);
|
||||
}
|
||||
// mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
|
||||
|
||||
#else // Vc_IMPL_AVX2
|
||||
|
||||
static Vc_INTRINSIC Vc_CONST m256i xor_si256(__m256i x, __m256i y) { return _mm256_xor_si256(x, y); }
|
||||
static Vc_INTRINSIC Vc_CONST m256i or_si256(__m256i x, __m256i y) { return _mm256_or_si256(x, y); }
|
||||
static Vc_INTRINSIC Vc_CONST m256i and_si256(__m256i x, __m256i y) { return _mm256_and_si256(x, y); }
|
||||
static Vc_INTRINSIC Vc_CONST m256i andnot_si256(__m256i x, __m256i y) { return _mm256_andnot_si256(x, y); }
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// implementation of the intrinsics missing in AVX2
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
Vc_INTRINSIC Vc_CONST m256i blendv_epi8(__m256i a0, __m256i b0, __m256i m0)
|
||||
{
|
||||
return _mm256_blendv_epi8(a0, b0, m0);
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST int movemask_epi8(__m256i a0)
|
||||
{
|
||||
return _mm256_movemask_epi8(a0);
|
||||
}
|
||||
|
||||
#endif // Vc_IMPL_AVX2
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// implementation of intrinsics missing in AVX and AVX2
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static Vc_INTRINSIC m256i cmplt_epi64(__m256i a, __m256i b) {
|
||||
return cmpgt_epi64(b, a);
|
||||
}
|
||||
static Vc_INTRINSIC m256i cmplt_epi32(__m256i a, __m256i b) {
|
||||
return cmpgt_epi32(b, a);
|
||||
}
|
||||
static Vc_INTRINSIC m256i cmplt_epi16(__m256i a, __m256i b) {
|
||||
return cmpgt_epi16(b, a);
|
||||
}
|
||||
static Vc_INTRINSIC m256i cmplt_epi8(__m256i a, __m256i b) {
|
||||
return cmpgt_epi8(b, a);
|
||||
}
|
||||
|
||||
static Vc_INTRINSIC m256i cmpgt_epu8(__m256i a, __m256i b) {
|
||||
return cmpgt_epi8(xor_si256(a, setmin_epi8()), xor_si256(b, setmin_epi8()));
|
||||
}
|
||||
#if defined(Vc_IMPL_XOP)
|
||||
Vc_AVX_TO_SSE_2_NEW(comlt_epu32)
|
||||
Vc_AVX_TO_SSE_2_NEW(comgt_epu32)
|
||||
Vc_AVX_TO_SSE_2_NEW(comlt_epu16)
|
||||
Vc_AVX_TO_SSE_2_NEW(comgt_epu16)
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmplt_epu32(__m256i a, __m256i b) { return comlt_epu32(a, b); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu32(__m256i a, __m256i b) { return comgt_epu32(a, b); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmplt_epu16(__m256i a, __m256i b) { return comlt_epu16(a, b); }
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu16(__m256i a, __m256i b) { return comgt_epu16(a, b); }
|
||||
#else
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmplt_epu32(__m256i _a, __m256i _b) {
|
||||
m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi32())));
|
||||
m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi32())));
|
||||
return cmplt_epi32(a, b);
|
||||
}
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu32(__m256i _a, __m256i _b) {
|
||||
m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi32())));
|
||||
m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi32())));
|
||||
return cmpgt_epi32(a, b);
|
||||
}
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmplt_epu16(__m256i _a, __m256i _b) {
|
||||
m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi16())));
|
||||
m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi16())));
|
||||
return cmplt_epi16(a, b);
|
||||
}
|
||||
static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu16(__m256i _a, __m256i _b) {
|
||||
m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi16())));
|
||||
m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi16())));
|
||||
return cmpgt_epi16(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
static Vc_INTRINSIC void _mm256_maskstore(float *mem, const __m256 mask, const __m256 v) {
|
||||
_mm256_maskstore_ps(mem, _mm256_castps_si256(mask), v);
|
||||
}
|
||||
static Vc_INTRINSIC void _mm256_maskstore(double *mem, const __m256d mask, const __m256d v) {
|
||||
_mm256_maskstore_pd(mem, _mm256_castpd_si256(mask), v);
|
||||
}
|
||||
static Vc_INTRINSIC void _mm256_maskstore(int *mem, const __m256i mask, const __m256i v) {
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
_mm256_maskstore_epi32(mem, mask, v);
|
||||
#else
|
||||
_mm256_maskstore_ps(reinterpret_cast<float *>(mem), mask, _mm256_castsi256_ps(v));
|
||||
#endif
|
||||
}
|
||||
static Vc_INTRINSIC void _mm256_maskstore(unsigned int *mem, const __m256i mask, const __m256i v) {
|
||||
_mm256_maskstore(reinterpret_cast<int *>(mem), mask, v);
|
||||
}
|
||||
static Vc_INTRINSIC void _mm256_maskstore(short *mem, const __m256i mask, const __m256i v) {
|
||||
using namespace AVX;
|
||||
_mm_maskmoveu_si128(_mm256_castsi256_si128(v), _mm256_castsi256_si128(mask), reinterpret_cast<char *>(&mem[0]));
|
||||
_mm_maskmoveu_si128(extract128<1>(v), extract128<1>(mask), reinterpret_cast<char *>(&mem[8]));
|
||||
}
|
||||
static Vc_INTRINSIC void _mm256_maskstore(unsigned short *mem, const __m256i mask, const __m256i v) {
|
||||
_mm256_maskstore(reinterpret_cast<short *>(mem), mask, v);
|
||||
}
|
||||
|
||||
#undef Vc_AVX_TO_SSE_1
|
||||
#undef Vc_AVX_TO_SSE_1_128
|
||||
#undef Vc_AVX_TO_SSE_2_NEW
|
||||
#undef Vc_AVX_TO_SSE_256_128
|
||||
#undef Vc_AVX_TO_SSE_1i
|
||||
|
||||
template<typename R> Vc_INTRINSIC_L R stream_load(const float *mem) Vc_INTRINSIC_R;
|
||||
template<> Vc_INTRINSIC m128 stream_load<m128>(const float *mem)
|
||||
{
|
||||
return _mm_castsi128_ps(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<float *>(mem))));
|
||||
}
|
||||
template<> Vc_INTRINSIC m256 stream_load<m256>(const float *mem)
|
||||
{
|
||||
return insert128<1>(_mm256_castps128_ps256(stream_load<m128>(mem)),
|
||||
stream_load<m128>(mem + 4));
|
||||
}
|
||||
|
||||
template<typename R> Vc_INTRINSIC_L R stream_load(const double *mem) Vc_INTRINSIC_R;
|
||||
template<> Vc_INTRINSIC m128d stream_load<m128d>(const double *mem)
|
||||
{
|
||||
return _mm_castsi128_pd(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<double *>(mem))));
|
||||
}
|
||||
template<> Vc_INTRINSIC m256d stream_load<m256d>(const double *mem)
|
||||
{
|
||||
return insert128<1>(_mm256_castpd128_pd256(stream_load<m128d>(mem)),
|
||||
stream_load<m128d>(mem + 2));
|
||||
}
|
||||
|
||||
template<typename R> Vc_INTRINSIC_L R stream_load(const void *mem) Vc_INTRINSIC_R;
|
||||
template<> Vc_INTRINSIC m128i stream_load<m128i>(const void *mem)
|
||||
{
|
||||
return _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast<void *>(mem)));
|
||||
}
|
||||
template<> Vc_INTRINSIC m256i stream_load<m256i>(const void *mem)
|
||||
{
|
||||
return insert128<1>(_mm256_castsi128_si256(stream_load<m128i>(mem)),
|
||||
stream_load<m128i>(static_cast<const __m128i *>(mem) + 1));
|
||||
}
|
||||
|
||||
Vc_INTRINSIC void stream_store(float *mem, __m128 value, __m128 mask)
|
||||
{
|
||||
_mm_maskmoveu_si128(_mm_castps_si128(value), _mm_castps_si128(mask), reinterpret_cast<char *>(mem));
|
||||
}
|
||||
Vc_INTRINSIC void stream_store(float *mem, __m256 value, __m256 mask)
|
||||
{
|
||||
stream_store(mem, _mm256_castps256_ps128(value), _mm256_castps256_ps128(mask));
|
||||
stream_store(mem + 4, extract128<1>(value), extract128<1>(mask));
|
||||
}
|
||||
Vc_INTRINSIC void stream_store(double *mem, __m128d value, __m128d mask)
|
||||
{
|
||||
_mm_maskmoveu_si128(_mm_castpd_si128(value), _mm_castpd_si128(mask), reinterpret_cast<char *>(mem));
|
||||
}
|
||||
Vc_INTRINSIC void stream_store(double *mem, __m256d value, __m256d mask)
|
||||
{
|
||||
stream_store(mem, _mm256_castpd256_pd128(value), _mm256_castpd256_pd128(mask));
|
||||
stream_store(mem + 2, extract128<1>(value), extract128<1>(mask));
|
||||
}
|
||||
Vc_INTRINSIC void stream_store(void *mem, __m128i value, __m128i mask)
|
||||
{
|
||||
_mm_maskmoveu_si128(value, mask, reinterpret_cast<char *>(mem));
|
||||
}
|
||||
Vc_INTRINSIC void stream_store(void *mem, __m256i value, __m256i mask)
|
||||
{
|
||||
stream_store(mem, _mm256_castsi256_si128(value), _mm256_castsi256_si128(mask));
|
||||
stream_store(static_cast<__m128i *>(mem) + 1, extract128<1>(value), extract128<1>(mask));
|
||||
}
|
||||
|
||||
#ifndef __x86_64__
|
||||
Vc_INTRINSIC Vc_PURE __m128i _mm_cvtsi64_si128(int64_t x) {
|
||||
return _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double *>(&x)));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <int Scale> __m256 gather(const float *addr, __m256i idx)
|
||||
{
|
||||
return _mm256_i32gather_ps(addr, idx, Scale);
|
||||
}
|
||||
template <int Scale> __m256d gather(const double *addr, __m128i idx)
|
||||
{
|
||||
return _mm256_i32gather_pd(addr, idx, Scale);
|
||||
}
|
||||
template <int Scale> __m256i gather(const int *addr, __m256i idx)
|
||||
{
|
||||
return _mm256_i32gather_epi32(addr, idx, Scale);
|
||||
}
|
||||
template <int Scale> __m256i gather(const unsigned *addr, __m256i idx)
|
||||
{
|
||||
return _mm256_i32gather_epi32(aliasing_cast<int>(addr), idx, Scale);
|
||||
}
|
||||
|
||||
template <int Scale> __m256 gather(__m256 src, __m256 k, const float *addr, __m256i idx)
|
||||
{
|
||||
return _mm256_mask_i32gather_ps(src, addr, idx, k, Scale);
|
||||
}
|
||||
template <int Scale>
|
||||
__m256d gather(__m256d src, __m256d k, const double *addr, __m128i idx)
|
||||
{
|
||||
return _mm256_mask_i32gather_pd(src, addr, idx, k, Scale);
|
||||
}
|
||||
template <int Scale> __m256i gather(__m256i src, __m256i k, const int *addr, __m256i idx)
|
||||
{
|
||||
return _mm256_mask_i32gather_epi32(src, addr, idx, k, Scale);
|
||||
}
|
||||
template <int Scale>
|
||||
__m256i gather(__m256i src, __m256i k, const unsigned *addr, __m256i idx)
|
||||
{
|
||||
return _mm256_mask_i32gather_epi32(src, aliasing_cast<int>(addr), idx, k, Scale);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace AvxIntrinsics
|
||||
} // namespace Vc
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
using namespace AvxIntrinsics;
|
||||
} // namespace AVX
|
||||
namespace AVX2
|
||||
{
|
||||
using namespace AvxIntrinsics;
|
||||
} // namespace AVX2
|
||||
namespace AVX
|
||||
{
|
||||
template<typename T> struct VectorTypeHelper;
|
||||
template<> struct VectorTypeHelper< char > { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper< signed char > { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper<unsigned char > { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper< short> { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper<unsigned short> { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper< int > { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper<unsigned int > { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper< long > { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper<unsigned long > { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper< long long> { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper<unsigned long long> { typedef __m256i Type; };
|
||||
template<> struct VectorTypeHelper< float> { typedef __m256 Type; };
|
||||
template<> struct VectorTypeHelper< double> { typedef __m256d Type; };
|
||||
|
||||
template <typename T>
|
||||
using IntegerVectorType =
|
||||
typename std::conditional<sizeof(T) == 16, __m128i, __m256i>::type;
|
||||
template <typename T>
|
||||
using DoubleVectorType =
|
||||
typename std::conditional<sizeof(T) == 16, __m128d, __m256d>::type;
|
||||
template <typename T>
|
||||
using FloatVectorType =
|
||||
typename std::conditional<sizeof(T) == 16, __m128, __m256>::type;
|
||||
|
||||
template<typename T> struct VectorHelper {};
|
||||
template<typename T> struct VectorHelperSize;
|
||||
} // namespace AVX
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_INTRINSICS_H_
|
|
@ -0,0 +1,87 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_LIMITS_H_
|
||||
#define VC_AVX_LIMITS_H_
|
||||
|
||||
#include "intrinsics.h"
|
||||
#include "types.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace std
|
||||
{
|
||||
#define Vc_NUM_LIM(T, _max, _min) \
|
||||
template <> struct numeric_limits<Vc::AVX2::Vector<T>> : public numeric_limits<T> { \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> max() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return _max; \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> min() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return _min; \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> lowest() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return min(); \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> epsilon() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return Vc::AVX2::Vector<T>::Zero(); \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> round_error() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return Vc::AVX2::Vector<T>::Zero(); \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> infinity() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return Vc::AVX2::Vector<T>::Zero(); \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> quiet_NaN() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return Vc::AVX2::Vector<T>::Zero(); \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> signaling_NaN() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return Vc::AVX2::Vector<T>::Zero(); \
|
||||
} \
|
||||
static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector<T> denorm_min() Vc_NOEXCEPT \
|
||||
{ \
|
||||
return Vc::AVX2::Vector<T>::Zero(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
Vc_NUM_LIM(unsigned short, Vc::Detail::allone<__m256i>(), Vc::Detail::zero<__m256i>());
|
||||
Vc_NUM_LIM( short, _mm256_srli_epi16(Vc::Detail::allone<__m256i>(), 1), Vc::AVX::setmin_epi16());
|
||||
Vc_NUM_LIM( unsigned int, Vc::Detail::allone<__m256i>(), Vc::Detail::zero<__m256i>());
|
||||
Vc_NUM_LIM( int, _mm256_srli_epi32(Vc::Detail::allone<__m256i>(), 1), Vc::AVX::setmin_epi32());
|
||||
#endif
|
||||
#undef Vc_NUM_LIM
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // VC_AVX_LIMITS_H_
|
|
@ -0,0 +1,33 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#include "../common/macros.h"
|
||||
|
||||
#ifndef VC_AVX_MACROS_H_
|
||||
#define VC_AVX_MACROS_H_
|
||||
|
||||
#endif // VC_AVX_MACROS_H_
|
|
@ -0,0 +1,235 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_MASK_H_
|
||||
#define VC_AVX_MASK_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "intrinsics.h"
|
||||
#include "../common/storage.h"
|
||||
#include "../common/bitscanintrinsics.h"
|
||||
#include "../common/maskbool.h"
|
||||
#include "detail.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
template <typename T> class Mask<T, VectorAbi::Avx>
|
||||
{
|
||||
public:
|
||||
using abi = VectorAbi::Avx;
|
||||
|
||||
/**
|
||||
* The \c EntryType of masks is always bool, independent of \c T.
|
||||
*/
|
||||
typedef bool EntryType;
|
||||
using value_type = EntryType;
|
||||
|
||||
using MaskBool = Common::MaskBool<sizeof(T)>;
|
||||
/**
|
||||
* The \c VectorEntryType, in contrast to \c EntryType, reveals information about the SIMD
|
||||
* implementation. This type is useful for the \c sizeof operator in generic functions.
|
||||
*/
|
||||
using VectorEntryType = MaskBool;
|
||||
|
||||
/**
|
||||
* The associated Vector<T> type.
|
||||
*/
|
||||
using Vector = AVX2::Vector<T>;
|
||||
|
||||
///\internal
|
||||
using VectorTypeF = AVX::FloatVectorType<typename AVX::VectorTypeHelper<T>::Type>;
|
||||
///\internal
|
||||
using VectorTypeD = AVX::DoubleVectorType<VectorTypeF>;
|
||||
///\internal
|
||||
using VectorTypeI = AVX::IntegerVectorType<VectorTypeF>;
|
||||
|
||||
private:
|
||||
typedef const VectorTypeF VArg;
|
||||
typedef const VectorTypeD VdArg;
|
||||
typedef const VectorTypeI ViArg;
|
||||
|
||||
public:
|
||||
static constexpr size_t Size = sizeof(VectorTypeF) / sizeof(T);
|
||||
static constexpr size_t MemoryAlignment = Size;
|
||||
static constexpr std::size_t size() { return Size; }
|
||||
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(VectorType));
|
||||
|
||||
private:
|
||||
typedef Common::Storage<T, Size> Storage;
|
||||
|
||||
public:
|
||||
/**
|
||||
* The \c VectorType reveals the implementation-specific internal type used for the
|
||||
* SIMD type.
|
||||
*/
|
||||
using VectorType = typename Storage::VectorType;
|
||||
|
||||
using EntryReference = Vc::Detail::ElementReference<Mask>;
|
||||
using reference = EntryReference;
|
||||
|
||||
// abstracts the way Masks are passed to functions, it can easily be changed to const ref here
|
||||
#if defined Vc_MSVC && defined _WIN32
|
||||
typedef const Mask &AsArg;
|
||||
#else
|
||||
typedef const Mask AsArg;
|
||||
#endif
|
||||
|
||||
Vc_INTRINSIC Mask() {}
|
||||
Vc_INTRINSIC Mask(VArg x) : d(AVX::avx_cast<VectorType>(x)) {}
|
||||
Vc_INTRINSIC Mask(VdArg x) : d(AVX::avx_cast<VectorType>(x)) {}
|
||||
Vc_INTRINSIC Mask(ViArg x) : d(AVX::avx_cast<VectorType>(x)) {}
|
||||
Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero) : d(Detail::zero<VectorType>()) {}
|
||||
Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne) : d(Detail::allone<VectorType>()) {}
|
||||
Vc_INTRINSIC explicit Mask(bool b)
|
||||
: d(b ? Detail::allone<VectorType>() : Detail::zero<VectorType>())
|
||||
{
|
||||
}
|
||||
Vc_INTRINSIC static Mask Zero() { return Mask{Vc::Zero}; }
|
||||
Vc_INTRINSIC static Mask One() { return Mask{Vc::One}; }
|
||||
|
||||
// implicit cast
|
||||
template <typename U>
|
||||
Vc_INTRINSIC Mask(
|
||||
U &&rhs, Common::enable_if_mask_converts_implicitly<Mask, T, U> = nullarg)
|
||||
: d(AVX::avx_cast<VectorType>(
|
||||
Detail::mask_cast<Traits::decay<U>::Size, Size, VectorTypeF>(
|
||||
rhs.dataI())))
|
||||
{
|
||||
}
|
||||
|
||||
#if Vc_IS_VERSION_1
|
||||
// explicit cast, implemented via simd_cast (in avx/simd_cast_caller.h)
|
||||
template <typename U>
|
||||
Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
|
||||
"mask types") Vc_INTRINSIC
|
||||
explicit Mask(U &&rhs,
|
||||
Common::enable_if_mask_converts_explicitly<T, U> = nullarg);
|
||||
#endif
|
||||
|
||||
template<typename Flags = DefaultLoadTag> Vc_INTRINSIC explicit Mask(const bool *mem, Flags f = Flags()) { load(mem, f); }
|
||||
|
||||
template<typename Flags = DefaultLoadTag> Vc_INTRINSIC void load(const bool *mem, Flags = Flags());
|
||||
|
||||
template<typename Flags = DefaultLoadTag> Vc_INTRINSIC void store(bool *mem, Flags = Flags()) const;
|
||||
|
||||
Vc_INTRINSIC Mask &operator=(const Mask &) = default;
|
||||
Vc_INTRINSIC_L Mask &operator=(const std::array<bool, Size> &values) Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L operator std::array<bool, Size>() const Vc_INTRINSIC_R;
|
||||
|
||||
// specializations in mask.tcc
|
||||
Vc_INTRINSIC Vc_PURE bool operator==(const Mask &rhs) const
|
||||
{ return Detail::movemask(d.v()) == Detail::movemask(rhs.d.v()); }
|
||||
|
||||
Vc_INTRINSIC Vc_PURE bool operator!=(const Mask &rhs) const
|
||||
{ return !operator==(rhs); }
|
||||
|
||||
Vc_INTRINSIC Mask operator!() const
|
||||
{
|
||||
#ifdef Vc_GCC
|
||||
return ~dataI();
|
||||
#else
|
||||
return Detail::andnot_(dataF(), Detail::allone<VectorTypeF>());
|
||||
#endif
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Mask &operator&=(const Mask &rhs) { d.v() = AVX::avx_cast<VectorType>(Detail::and_(data(), rhs.data())); return *this; }
|
||||
Vc_INTRINSIC Mask &operator|=(const Mask &rhs) { d.v() = AVX::avx_cast<VectorType>(Detail::or_ (data(), rhs.data())); return *this; }
|
||||
Vc_INTRINSIC Mask &operator^=(const Mask &rhs) { d.v() = AVX::avx_cast<VectorType>(Detail::xor_(data(), rhs.data())); return *this; }
|
||||
|
||||
Vc_INTRINSIC Vc_PURE Mask operator&(const Mask &rhs) const { return Detail::and_(data(), rhs.data()); }
|
||||
Vc_INTRINSIC Vc_PURE Mask operator|(const Mask &rhs) const { return Detail::or_(data(), rhs.data()); }
|
||||
Vc_INTRINSIC Vc_PURE Mask operator^(const Mask &rhs) const { return Detail::xor_(data(), rhs.data()); }
|
||||
|
||||
Vc_INTRINSIC Vc_PURE Mask operator&&(const Mask &rhs) const { return Detail::and_(data(), rhs.data()); }
|
||||
Vc_INTRINSIC Vc_PURE Mask operator||(const Mask &rhs) const { return Detail::or_(data(), rhs.data()); }
|
||||
|
||||
// no need for expression template optimizations because cmp(n)eq for floats are not bitwise
|
||||
// compares
|
||||
Vc_INTRINSIC_L bool isNotEmpty() const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L bool isEmpty() const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L bool isFull() const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L bool isMix() const Vc_INTRINSIC_R;
|
||||
|
||||
Vc_INTRINSIC Vc_PURE int shiftMask() const { return Detail::movemask(dataI()); }
|
||||
Vc_INTRINSIC Vc_PURE int toInt() const { return Detail::mask_to_int<Size>(dataI()); }
|
||||
|
||||
Vc_INTRINSIC VectorType data () const { return d.v(); }
|
||||
Vc_INTRINSIC VectorTypeF dataF() const { return AVX::avx_cast<VectorTypeF>(d.v()); }
|
||||
Vc_INTRINSIC VectorTypeI dataI() const { return AVX::avx_cast<VectorTypeI>(d.v()); }
|
||||
Vc_INTRINSIC VectorTypeD dataD() const { return AVX::avx_cast<VectorTypeD>(d.v()); }
|
||||
|
||||
private:
|
||||
friend reference;
|
||||
static Vc_INTRINSIC Vc_PURE value_type get(const Mask &m, int i) noexcept
|
||||
{
|
||||
return m.toInt() & (1 << i);
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_INTRINSIC void set(Mask &m, int i,
|
||||
U &&v) noexcept(noexcept(MaskBool(std::declval<U>())))
|
||||
{
|
||||
m.d.set(i, MaskBool(std::forward<U>(v)));
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* \note the returned object models the concept of a reference and
|
||||
* as such it can exist longer than the data it is referencing.
|
||||
* \note to avoid lifetime issues, we strongly advice not to store
|
||||
* any reference objects.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
|
||||
{
|
||||
return {*this, int(index)};
|
||||
}
|
||||
Vc_ALWAYS_INLINE Vc_PURE value_type operator[](size_t index) const noexcept
|
||||
{
|
||||
return get(*this, index);
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Vc_PURE int count() const { return Detail::popcnt16(toInt()); }
|
||||
Vc_INTRINSIC Vc_PURE int firstOne() const { return _bit_scan_forward(toInt()); }
|
||||
|
||||
template <typename G> static Vc_INTRINSIC_L Mask generate(G &&gen) Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L Vc_PURE_L Mask shifted(int amount) const Vc_INTRINSIC_R Vc_PURE_R;
|
||||
|
||||
private:
|
||||
#ifdef Vc_COMPILE_BENCHMARKS
|
||||
public:
|
||||
#endif
|
||||
Storage d;
|
||||
};
|
||||
template <typename T> constexpr size_t Mask<T, VectorAbi::Avx>::Size;
|
||||
template <typename T> constexpr size_t Mask<T, VectorAbi::Avx>::MemoryAlignment;
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#include "mask.tcc"
|
||||
|
||||
#endif // VC_AVX_MASK_H_
|
|
@ -0,0 +1,292 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
// store {{{1
|
||||
template <typename T>
|
||||
template <typename Flags>
|
||||
Vc_INTRINSIC void Mask<T, VectorAbi::Avx>::store(bool *mem, Flags f) const
|
||||
{
|
||||
Detail::mask_store<Size>(dataI(), mem, f);
|
||||
}
|
||||
|
||||
// load {{{1
|
||||
template <typename T>
|
||||
template <typename Flags>
|
||||
Vc_INTRINSIC void Mask<T, VectorAbi::Avx>::load(const bool *mem, Flags f)
|
||||
{
|
||||
d.v() = AVX::avx_cast<VectorType>(Detail::mask_load<VectorTypeF, Size>(mem, f));
|
||||
}
|
||||
|
||||
// operator[] {{{1
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <>
|
||||
Vc_INTRINSIC Vc_PURE bool AVX2::Mask<int16_t>::get(const AVX2::Mask<int16_t> &m,
|
||||
int index) noexcept
|
||||
{
|
||||
return m.shiftMask() & (1 << 2 * index);
|
||||
}
|
||||
template <>
|
||||
Vc_INTRINSIC Vc_PURE bool AVX2::Mask<uint16_t>::get(const AVX2::Mask<uint16_t> &m,
|
||||
int index) noexcept
|
||||
{
|
||||
return m.shiftMask() & (1 << 2 * index);
|
||||
}
|
||||
#endif
|
||||
// operator== {{{1
|
||||
template <> Vc_INTRINSIC Vc_PURE bool AVX2::double_m::operator==(const AVX2::double_m &rhs) const
|
||||
{ return Detail::movemask(dataD()) == Detail::movemask(rhs.dataD()); }
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <> Vc_INTRINSIC Vc_PURE bool AVX2::short_m::operator==(const AVX2::short_m &rhs) const
|
||||
{ return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); }
|
||||
template <> Vc_INTRINSIC Vc_PURE bool AVX2::ushort_m::operator==(const AVX2::ushort_m &rhs) const
|
||||
{ return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); }
|
||||
#endif
|
||||
|
||||
// isFull, isNotEmpty, isEmpty, isMix specializations{{{1
|
||||
template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isFull() const {
|
||||
if (sizeof(T) == 8) {
|
||||
return 0 != Detail::testc(dataD(), Detail::allone<VectorTypeD>());
|
||||
} else if (sizeof(T) == 4) {
|
||||
return 0 != Detail::testc(dataF(), Detail::allone<VectorTypeF>());
|
||||
} else {
|
||||
return 0 != Detail::testc(dataI(), Detail::allone<VectorTypeI>());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isNotEmpty() const {
|
||||
if (sizeof(T) == 8) {
|
||||
return 0 == Detail::testz(dataD(), dataD());
|
||||
} else if (sizeof(T) == 4) {
|
||||
return 0 == Detail::testz(dataF(), dataF());
|
||||
} else {
|
||||
return 0 == Detail::testz(dataI(), dataI());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isEmpty() const {
|
||||
if (sizeof(T) == 8) {
|
||||
return 0 != Detail::testz(dataD(), dataD());
|
||||
} else if (sizeof(T) == 4) {
|
||||
return 0 != Detail::testz(dataF(), dataF());
|
||||
} else {
|
||||
return 0 != Detail::testz(dataI(), dataI());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isMix() const {
|
||||
if (sizeof(T) == 8) {
|
||||
return 0 != Detail::testnzc(dataD(), Detail::allone<VectorTypeD>());
|
||||
} else if (sizeof(T) == 4) {
|
||||
return 0 != Detail::testnzc(dataF(), Detail::allone<VectorTypeF>());
|
||||
} else {
|
||||
return 0 != Detail::testnzc(dataI(), Detail::allone<VectorTypeI>());
|
||||
}
|
||||
}
|
||||
|
||||
// generate {{{1
|
||||
template <typename M, typename G>
|
||||
Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 4 + 32>)
|
||||
{
|
||||
return _mm256_setr_epi64x(
|
||||
gen(0) ? 0xffffffffffffffffull : 0, gen(1) ? 0xffffffffffffffffull : 0,
|
||||
gen(2) ? 0xffffffffffffffffull : 0, gen(3) ? 0xffffffffffffffffull : 0);
|
||||
}
|
||||
template <typename M, typename G>
|
||||
Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 8 + 32>)
|
||||
{
|
||||
return _mm256_setr_epi32(gen(0) ? 0xfffffffful : 0, gen(1) ? 0xfffffffful : 0,
|
||||
gen(2) ? 0xfffffffful : 0, gen(3) ? 0xfffffffful : 0,
|
||||
gen(4) ? 0xfffffffful : 0, gen(5) ? 0xfffffffful : 0,
|
||||
gen(6) ? 0xfffffffful : 0, gen(7) ? 0xfffffffful : 0);
|
||||
}
|
||||
template <typename M, typename G>
|
||||
Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 16 + 32>)
|
||||
{
|
||||
return _mm256_setr_epi16(gen(0) ? 0xfffful : 0, gen(1) ? 0xfffful : 0,
|
||||
gen(2) ? 0xfffful : 0, gen(3) ? 0xfffful : 0,
|
||||
gen(4) ? 0xfffful : 0, gen(5) ? 0xfffful : 0,
|
||||
gen(6) ? 0xfffful : 0, gen(7) ? 0xfffful : 0,
|
||||
gen(8) ? 0xfffful : 0, gen(9) ? 0xfffful : 0,
|
||||
gen(10) ? 0xfffful : 0, gen(11) ? 0xfffful : 0,
|
||||
gen(12) ? 0xfffful : 0, gen(13) ? 0xfffful : 0,
|
||||
gen(14) ? 0xfffful : 0, gen(15) ? 0xfffful : 0);
|
||||
}
|
||||
template <typename T>
|
||||
template <typename G>
|
||||
Vc_INTRINSIC AVX2::Mask<T> Mask<T, VectorAbi::Avx>::generate(G &&gen)
|
||||
{
|
||||
return generate_impl<AVX2::Mask<T>>(std::forward<G>(gen),
|
||||
std::integral_constant<int, Size + sizeof(Storage)>());
|
||||
}
|
||||
// shifted {{{1
|
||||
template <typename T> Vc_INTRINSIC Vc_PURE AVX2::Mask<T> Mask<T, VectorAbi::Avx>::shifted(int amount) const
|
||||
{
|
||||
switch (amount * int(sizeof(VectorEntryType))) {
|
||||
case 0: return *this;
|
||||
case 1: return Detail::shifted< 1>(dataI());
|
||||
case 2: return Detail::shifted< 2>(dataI());
|
||||
case 3: return Detail::shifted< 3>(dataI());
|
||||
case 4: return Detail::shifted< 4>(dataI());
|
||||
case 5: return Detail::shifted< 5>(dataI());
|
||||
case 6: return Detail::shifted< 6>(dataI());
|
||||
case 7: return Detail::shifted< 7>(dataI());
|
||||
case 8: return Detail::shifted< 8>(dataI());
|
||||
case 9: return Detail::shifted< 9>(dataI());
|
||||
case 10: return Detail::shifted< 10>(dataI());
|
||||
case 11: return Detail::shifted< 11>(dataI());
|
||||
case 12: return Detail::shifted< 12>(dataI());
|
||||
case 13: return Detail::shifted< 13>(dataI());
|
||||
case 14: return Detail::shifted< 14>(dataI());
|
||||
case 15: return Detail::shifted< 15>(dataI());
|
||||
case 16: return Detail::shifted< 16>(dataI());
|
||||
case 17: return Detail::shifted< 17>(dataI());
|
||||
case 18: return Detail::shifted< 18>(dataI());
|
||||
case 19: return Detail::shifted< 19>(dataI());
|
||||
case 20: return Detail::shifted< 20>(dataI());
|
||||
case 21: return Detail::shifted< 21>(dataI());
|
||||
case 22: return Detail::shifted< 22>(dataI());
|
||||
case 23: return Detail::shifted< 23>(dataI());
|
||||
case 24: return Detail::shifted< 24>(dataI());
|
||||
case 25: return Detail::shifted< 25>(dataI());
|
||||
case 26: return Detail::shifted< 26>(dataI());
|
||||
case 27: return Detail::shifted< 27>(dataI());
|
||||
case 28: return Detail::shifted< 28>(dataI());
|
||||
case 29: return Detail::shifted< 29>(dataI());
|
||||
case 30: return Detail::shifted< 30>(dataI());
|
||||
case 31: return Detail::shifted< 31>(dataI());
|
||||
case -1: return Detail::shifted< -1>(dataI());
|
||||
case -2: return Detail::shifted< -2>(dataI());
|
||||
case -3: return Detail::shifted< -3>(dataI());
|
||||
case -4: return Detail::shifted< -4>(dataI());
|
||||
case -5: return Detail::shifted< -5>(dataI());
|
||||
case -6: return Detail::shifted< -6>(dataI());
|
||||
case -7: return Detail::shifted< -7>(dataI());
|
||||
case -8: return Detail::shifted< -8>(dataI());
|
||||
case -9: return Detail::shifted< -9>(dataI());
|
||||
case -10: return Detail::shifted<-10>(dataI());
|
||||
case -11: return Detail::shifted<-11>(dataI());
|
||||
case -12: return Detail::shifted<-12>(dataI());
|
||||
case -13: return Detail::shifted<-13>(dataI());
|
||||
case -14: return Detail::shifted<-14>(dataI());
|
||||
case -15: return Detail::shifted<-15>(dataI());
|
||||
case -16: return Detail::shifted<-16>(dataI());
|
||||
case -17: return Detail::shifted<-17>(dataI());
|
||||
case -18: return Detail::shifted<-18>(dataI());
|
||||
case -19: return Detail::shifted<-19>(dataI());
|
||||
case -20: return Detail::shifted<-20>(dataI());
|
||||
case -21: return Detail::shifted<-21>(dataI());
|
||||
case -22: return Detail::shifted<-22>(dataI());
|
||||
case -23: return Detail::shifted<-23>(dataI());
|
||||
case -24: return Detail::shifted<-24>(dataI());
|
||||
case -25: return Detail::shifted<-25>(dataI());
|
||||
case -26: return Detail::shifted<-26>(dataI());
|
||||
case -27: return Detail::shifted<-27>(dataI());
|
||||
case -28: return Detail::shifted<-28>(dataI());
|
||||
case -29: return Detail::shifted<-29>(dataI());
|
||||
case -30: return Detail::shifted<-30>(dataI());
|
||||
case -31: return Detail::shifted<-31>(dataI());
|
||||
}
|
||||
return Zero();
|
||||
}
|
||||
// }}}1
|
||||
|
||||
/*
|
||||
template<> Vc_INTRINSIC AVX2::Mask< 4, 32> &AVX2::Mask< 4, 32>::operator=(const std::array<bool, 4> &values) {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
unsigned int x = *reinterpret_cast<const unsigned int *>(values.data());
|
||||
x *= 0xffu;
|
||||
__m128i y = _mm_cvtsi32_si128(x); // 4 Bytes
|
||||
y = _mm_unpacklo_epi8(y, y); // 8 Bytes
|
||||
y = _mm_unpacklo_epi16(y, y); // 16 Bytes
|
||||
d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi32(y, y), _mm_unpackhi_epi32(y, y)));
|
||||
return *this;
|
||||
}
|
||||
template<> Vc_INTRINSIC AVX2::Mask< 8, 32> &AVX2::Mask< 8, 32>::operator=(const std::array<bool, 8> &values) {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
unsigned long long x = *reinterpret_cast<const unsigned long long *>(values.data());
|
||||
x *= 0xffull;
|
||||
__m128i y = _mm_cvtsi64_si128(x); // 8 Bytes
|
||||
y = _mm_unpacklo_epi8(y, y); // 16 Bytes
|
||||
d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi16(y, y), _mm_unpackhi_epi16(y, y)));
|
||||
return *this;
|
||||
}
|
||||
template<> Vc_INTRINSIC AVX2::Mask< 8, 16> &AVX2::Mask< 8, 16>::operator=(const std::array<bool, 8> &values) {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
unsigned long long x = *reinterpret_cast<const unsigned long long *>(values.data());
|
||||
x *= 0xffull;
|
||||
__m128i y = _mm_cvtsi64_si128(x); // 8 Bytes
|
||||
d.v() = AVX::avx_cast<__m128>(_mm_unpacklo_epi8(y, y));
|
||||
return *this;
|
||||
}
|
||||
template<> Vc_INTRINSIC AVX2::Mask<16, 16> &AVX2::Mask<16, 16>::operator=(const std::array<bool, 16> &values) {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
__m128i x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(values.data()));
|
||||
d.v() = _mm_andnot_ps(AVX::_mm_setallone_ps(), AVX::avx_cast<__m128>(_mm_sub_epi8(x, _mm_set1_epi8(1))));
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<> Vc_INTRINSIC AVX2::Mask< 4, 32>::operator std::array<bool, 4>() const {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
__m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 64bit -> 32bit
|
||||
x = _mm_packs_epi32(x, x); // 32bit -> 16bit
|
||||
x = _mm_srli_epi16(x, 15);
|
||||
x = _mm_packs_epi16(x, x); // 16bit -> 8bit
|
||||
std::array<bool, 4> r;
|
||||
asm volatile("vmovd %1,%0" : "=m"(*r.data()) : "x"(x));
|
||||
return r;
|
||||
}
|
||||
template<> Vc_INTRINSIC AVX2::Mask< 8, 32>::operator std::array<bool, 8>() const {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
__m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 32bit -> 16bit
|
||||
x = _mm_srli_epi16(x, 15);
|
||||
x = _mm_packs_epi16(x, x); // 16bit -> 8bit
|
||||
std::array<bool, 8> r;
|
||||
asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x));
|
||||
return r;
|
||||
}
|
||||
template<> Vc_INTRINSIC AVX2::Mask< 8, 16>::operator std::array<bool, 8>() const {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
__m128i x = _mm_srli_epi16(dataI(), 15);
|
||||
x = _mm_packs_epi16(x, x); // 16bit -> 8bit
|
||||
std::array<bool, 8> r;
|
||||
asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x));
|
||||
return r;
|
||||
}
|
||||
template<> Vc_INTRINSIC AVX2::Mask<16, 16>::operator std::array<bool, 16>() const {
|
||||
static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
|
||||
__m128 x = _mm_and_ps(d.v(), AVX::avx_cast<__m128>(_mm_set1_epi32(0x01010101)));
|
||||
std::array<bool, 16> r;
|
||||
asm volatile("vmovups %1,%0" : "=m"(*r.data()) : "x"(x));
|
||||
return r;
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,321 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_MATH_H_
|
||||
#define VC_AVX_MATH_H_
|
||||
|
||||
#include "const.h"
|
||||
#include "limits.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
// min & max {{{1
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
Vc_ALWAYS_INLINE AVX2::int_v min(const AVX2::int_v &x, const AVX2::int_v &y) { return _mm256_min_epi32(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::uint_v min(const AVX2::uint_v &x, const AVX2::uint_v &y) { return _mm256_min_epu32(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::short_v min(const AVX2::short_v &x, const AVX2::short_v &y) { return _mm256_min_epi16(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::ushort_v min(const AVX2::ushort_v &x, const AVX2::ushort_v &y) { return _mm256_min_epu16(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::int_v max(const AVX2::int_v &x, const AVX2::int_v &y) { return _mm256_max_epi32(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::uint_v max(const AVX2::uint_v &x, const AVX2::uint_v &y) { return _mm256_max_epu32(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::short_v max(const AVX2::short_v &x, const AVX2::short_v &y) { return _mm256_max_epi16(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::ushort_v max(const AVX2::ushort_v &x, const AVX2::ushort_v &y) { return _mm256_max_epu16(x.data(), y.data()); }
|
||||
#endif
|
||||
Vc_ALWAYS_INLINE AVX2::float_v min(const AVX2::float_v &x, const AVX2::float_v &y) { return _mm256_min_ps(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::double_v min(const AVX2::double_v &x, const AVX2::double_v &y) { return _mm256_min_pd(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::float_v max(const AVX2::float_v &x, const AVX2::float_v &y) { return _mm256_max_ps(x.data(), y.data()); }
|
||||
Vc_ALWAYS_INLINE AVX2::double_v max(const AVX2::double_v &x, const AVX2::double_v &y) { return _mm256_max_pd(x.data(), y.data()); }
|
||||
|
||||
// sqrt {{{1
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> sqrt(const AVX2::Vector<T> &x)
|
||||
{
|
||||
return AVX::VectorHelper<T>::sqrt(x.data());
|
||||
}
|
||||
|
||||
// rsqrt {{{1
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> rsqrt(const AVX2::Vector<T> &x)
|
||||
{
|
||||
return AVX::VectorHelper<T>::rsqrt(x.data());
|
||||
}
|
||||
|
||||
// reciprocal {{{1
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> reciprocal(const AVX2::Vector<T> &x)
|
||||
{
|
||||
return AVX::VectorHelper<T>::reciprocal(x.data());
|
||||
}
|
||||
|
||||
// round {{{1
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> round(const AVX2::Vector<T> &x)
|
||||
{
|
||||
return AVX::VectorHelper<T>::round(x.data());
|
||||
}
|
||||
|
||||
// abs {{{1
|
||||
Vc_INTRINSIC Vc_CONST AVX2::double_v abs(AVX2::double_v x)
|
||||
{
|
||||
return Detail::and_(x.data(), AVX::setabsmask_pd());
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST AVX2::float_v abs(AVX2::float_v x)
|
||||
{
|
||||
return Detail::and_(x.data(), AVX::setabsmask_ps());
|
||||
}
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
Vc_INTRINSIC Vc_CONST AVX2::int_v abs(AVX2::int_v x)
|
||||
{
|
||||
return _mm256_abs_epi32(x.data());
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST AVX2::short_v abs(AVX2::short_v x)
|
||||
{
|
||||
return _mm256_abs_epi16(x.data());
|
||||
}
|
||||
#endif
|
||||
|
||||
// isfinite {{{1
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::double_m isfinite(const AVX2::double_v &x)
|
||||
{
|
||||
return AVX::cmpord_pd(x.data(), _mm256_mul_pd(Detail::zero<__m256d>(), x.data()));
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::float_m isfinite(const AVX2::float_v &x)
|
||||
{
|
||||
return AVX::cmpord_ps(x.data(), _mm256_mul_ps(Detail::zero<__m256>(), x.data()));
|
||||
}
|
||||
|
||||
// isinf {{{1
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::double_m isinf(const AVX2::double_v &x)
|
||||
{
|
||||
return _mm256_castsi256_pd(AVX::cmpeq_epi64(
|
||||
_mm256_castpd_si256(abs(x).data()),
|
||||
_mm256_castpd_si256(Detail::avx_broadcast(AVX::c_log<double>::d(1)))));
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::float_m isinf(const AVX2::float_v &x)
|
||||
{
|
||||
return _mm256_castsi256_ps(
|
||||
AVX::cmpeq_epi32(_mm256_castps_si256(abs(x).data()),
|
||||
_mm256_castps_si256(Detail::avx_broadcast(AVX::c_log<float>::d(1)))));
|
||||
}
|
||||
|
||||
// isnan {{{1
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::double_m isnan(const AVX2::double_v &x)
|
||||
{
|
||||
return AVX::cmpunord_pd(x.data(), x.data());
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE Vc_PURE AVX2::float_m isnan(const AVX2::float_v &x)
|
||||
{
|
||||
return AVX::cmpunord_ps(x.data(), x.data());
|
||||
}
|
||||
|
||||
// copysign {{{1
|
||||
Vc_INTRINSIC Vc_CONST AVX2::float_v copysign(AVX2::float_v mag, AVX2::float_v sign)
|
||||
{
|
||||
return _mm256_or_ps(_mm256_and_ps(sign.data(), AVX::setsignmask_ps()),
|
||||
_mm256_and_ps(mag.data(), AVX::setabsmask_ps()));
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST AVX2::double_v copysign(AVX2::double_v::AsArg mag,
|
||||
AVX2::double_v::AsArg sign)
|
||||
{
|
||||
return _mm256_or_pd(_mm256_and_pd(sign.data(), AVX::setsignmask_pd()),
|
||||
_mm256_and_pd(mag.data(), AVX::setabsmask_pd()));
|
||||
}
|
||||
|
||||
//}}}1
|
||||
// frexp {{{1
|
||||
/**
|
||||
* splits \p v into exponent and mantissa, the sign is kept with the mantissa
|
||||
*
|
||||
* The return value will be in the range [0.5, 1.0[
|
||||
* The \p e value will be an integer defining the power-of-two exponent
|
||||
*/
|
||||
inline AVX2::double_v frexp(AVX2::double_v::AsArg v, SimdArray<int, 4> *e)
|
||||
{
|
||||
const __m256d exponentBits = AVX::Const<double>::exponentMask().dataD();
|
||||
const __m256d exponentPart = _mm256_and_pd(v.data(), exponentBits);
|
||||
auto lo = AVX::avx_cast<__m128i>(AVX::lo128(exponentPart));
|
||||
auto hi = AVX::avx_cast<__m128i>(AVX::hi128(exponentPart));
|
||||
lo = _mm_sub_epi32(_mm_srli_epi64(lo, 52), _mm_set1_epi64x(0x3fe));
|
||||
hi = _mm_sub_epi32(_mm_srli_epi64(hi, 52), _mm_set1_epi64x(0x3fe));
|
||||
SSE::int_v exponent = Mem::shuffle<X0, X2, Y0, Y2>(lo, hi);
|
||||
const __m256d exponentMaximized = _mm256_or_pd(v.data(), exponentBits);
|
||||
AVX2::double_v ret =
|
||||
_mm256_and_pd(exponentMaximized,
|
||||
_mm256_broadcast_sd(reinterpret_cast<const double *>(&AVX::c_general::frexpMask)));
|
||||
const double_m zeroMask = v == AVX2::double_v::Zero();
|
||||
ret(isnan(v) || !isfinite(v) || zeroMask) = v;
|
||||
exponent.setZero(simd_cast<SSE::int_m>(zeroMask));
|
||||
internal_data(*e) = exponent;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
inline SimdArray<double, 8> frexp(const SimdArray<double, 8> &v, SimdArray<int, 8> *e)
|
||||
{
|
||||
const __m256d exponentBits = AVX::Const<double>::exponentMask().dataD();
|
||||
const __m256d w[2] = {internal_data(internal_data0(v)).data(),
|
||||
internal_data(internal_data1(v)).data()};
|
||||
const __m256i exponentPart[2] = {
|
||||
_mm256_castpd_si256(_mm256_and_pd(w[0], exponentBits)),
|
||||
_mm256_castpd_si256(_mm256_and_pd(w[1], exponentBits))};
|
||||
const __m256i lo = _mm256_sub_epi32(_mm256_srli_epi64(exponentPart[0], 52),
|
||||
_mm256_set1_epi32(0x3fe)); // 0.1. 2.3.
|
||||
const __m256i hi = _mm256_sub_epi32(_mm256_srli_epi64(exponentPart[1], 52),
|
||||
_mm256_set1_epi32(0x3fe)); // 4.5. 6.7.
|
||||
const __m256i a = _mm256_unpacklo_epi32(lo, hi); // 04.. 26..
|
||||
const __m256i b = _mm256_unpackhi_epi32(lo, hi); // 15.. 37..
|
||||
const __m256i tmp = _mm256_unpacklo_epi32(a, b); // 0145 2367
|
||||
const __m256i exponent =
|
||||
AVX::concat(_mm_unpacklo_epi64(AVX::lo128(tmp), AVX::hi128(tmp)),
|
||||
_mm_unpackhi_epi64(AVX::lo128(tmp), AVX::hi128(tmp))); // 0123 4567
|
||||
const __m256d exponentMaximized[2] = {_mm256_or_pd(w[0], exponentBits),
|
||||
_mm256_or_pd(w[1], exponentBits)};
|
||||
const auto frexpMask =
|
||||
_mm256_broadcast_sd(reinterpret_cast<const double *>(&AVX::c_general::frexpMask));
|
||||
fixed_size_simd<double, 8> ret = {
|
||||
fixed_size_simd<double, 4>(
|
||||
AVX::double_v(_mm256_and_pd(exponentMaximized[0], frexpMask))),
|
||||
fixed_size_simd<double, 4>(
|
||||
AVX::double_v(_mm256_and_pd(exponentMaximized[1], frexpMask)))};
|
||||
const auto zeroMask = v == v.Zero();
|
||||
ret(isnan(v) || !isfinite(v) || zeroMask) = v;
|
||||
internal_data(*e) =
|
||||
Detail::andnot_(simd_cast<AVX2::int_m>(zeroMask).dataI(), exponent);
|
||||
return ret;
|
||||
}
|
||||
#endif // Vc_IMPL_AVX2
|
||||
|
||||
namespace Detail
|
||||
{
|
||||
Vc_INTRINSIC AVX2::float_v::IndexType extractExponent(__m256 e)
|
||||
{
|
||||
SimdArray<uint, float_v::Size> exponentPart;
|
||||
const auto ee = AVX::avx_cast<__m256i>(e);
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
exponentPart = AVX2::uint_v(ee);
|
||||
#else
|
||||
internal_data(internal_data0(exponentPart)) = AVX::lo128(ee);
|
||||
internal_data(internal_data1(exponentPart)) = AVX::hi128(ee);
|
||||
#endif
|
||||
return (exponentPart >> 23) - 0x7e;
|
||||
}
|
||||
} // namespace Detail
|
||||
inline AVX2::float_v frexp(AVX2::float_v::AsArg v, SimdArray<int, 8> *e)
|
||||
{
|
||||
using namespace Detail;
|
||||
using namespace AVX2;
|
||||
const __m256 exponentBits = Const<float>::exponentMask().data();
|
||||
*e = extractExponent(and_(v.data(), exponentBits));
|
||||
const __m256 exponentMaximized = or_(v.data(), exponentBits);
|
||||
AVX2::float_v ret = _mm256_and_ps(exponentMaximized, avx_cast<__m256>(set1_epi32(0xbf7fffffu)));
|
||||
ret(isnan(v) || !isfinite(v) || v == AVX2::float_v::Zero()) = v;
|
||||
e->setZero(simd_cast<decltype(*e == *e)>(v == AVX2::float_v::Zero()));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// ldexp {{{1
|
||||
/* -> x * 2^e
|
||||
* x == NaN -> NaN
|
||||
* x == (-)inf -> (-)inf
|
||||
*/
|
||||
inline AVX2::double_v ldexp(AVX2::double_v::AsArg v, const SimdArray<int, 4> &_e)
|
||||
{
|
||||
SSE::int_v e = internal_data(_e);
|
||||
e.setZero(simd_cast<SSE::int_m>(v == AVX2::double_v::Zero()));
|
||||
const __m256i exponentBits =
|
||||
AVX::concat(_mm_slli_epi64(_mm_unpacklo_epi32(e.data(), e.data()), 52),
|
||||
_mm_slli_epi64(_mm_unpackhi_epi32(e.data(), e.data()), 52));
|
||||
return AVX::avx_cast<__m256d>(
|
||||
AVX::add_epi64(AVX::avx_cast<__m256i>(v.data()), exponentBits));
|
||||
}
|
||||
inline AVX2::float_v ldexp(AVX2::float_v::AsArg v, SimdArray<int, 8> e)
|
||||
{
|
||||
e.setZero(simd_cast<decltype(e == e)>(v == AVX2::float_v::Zero()));
|
||||
e <<= 23;
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return {AVX::avx_cast<__m256>(
|
||||
AVX::concat(_mm_add_epi32(AVX::avx_cast<__m128i>(AVX::lo128(v.data())),
|
||||
AVX::lo128(internal_data(e).data())),
|
||||
_mm_add_epi32(AVX::avx_cast<__m128i>(AVX::hi128(v.data())),
|
||||
AVX::hi128(internal_data(e).data()))))};
|
||||
#else
|
||||
return {AVX::avx_cast<__m256>(
|
||||
AVX::concat(_mm_add_epi32(AVX::avx_cast<__m128i>(AVX::lo128(v.data())),
|
||||
internal_data(internal_data0(e)).data()),
|
||||
_mm_add_epi32(AVX::avx_cast<__m128i>(AVX::hi128(v.data())),
|
||||
internal_data(internal_data1(e)).data())))};
|
||||
#endif
|
||||
}
|
||||
|
||||
// trunc {{{1
|
||||
Vc_ALWAYS_INLINE AVX2::float_v trunc(AVX2::float_v::AsArg v)
|
||||
{
|
||||
return _mm256_round_ps(v.data(), 0x3);
|
||||
}
|
||||
Vc_ALWAYS_INLINE AVX2::double_v trunc(AVX2::double_v::AsArg v)
|
||||
{
|
||||
return _mm256_round_pd(v.data(), 0x3);
|
||||
}
|
||||
|
||||
// floor {{{1
|
||||
Vc_ALWAYS_INLINE AVX2::float_v floor(AVX2::float_v::AsArg v)
|
||||
{
|
||||
return _mm256_floor_ps(v.data());
|
||||
}
|
||||
Vc_ALWAYS_INLINE AVX2::double_v floor(AVX2::double_v::AsArg v)
|
||||
{
|
||||
return _mm256_floor_pd(v.data());
|
||||
}
|
||||
|
||||
// ceil {{{1
|
||||
Vc_ALWAYS_INLINE AVX2::float_v ceil(AVX2::float_v::AsArg v)
|
||||
{
|
||||
return _mm256_ceil_ps(v.data());
|
||||
}
|
||||
Vc_ALWAYS_INLINE AVX2::double_v ceil(AVX2::double_v::AsArg v)
|
||||
{
|
||||
return _mm256_ceil_pd(v.data());
|
||||
}
|
||||
|
||||
// fma {{{1
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE Vector<T, VectorAbi::Avx> fma(Vector<T, VectorAbi::Avx> a,
|
||||
Vector<T, VectorAbi::Avx> b,
|
||||
Vector<T, VectorAbi::Avx> c)
|
||||
{
|
||||
return Detail::fma(a.data(), b.data(), c.data(), T());
|
||||
}
|
||||
|
||||
// }}}1
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_MATH_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,308 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_SHUFFLE_H_
|
||||
#define VC_AVX_SHUFFLE_H_
|
||||
|
||||
#include "../sse/shuffle.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
template <int... Dst> struct Permutation {};
|
||||
template <uint8_t... Sel> struct Mask {};
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <uint8_t Sel0, uint8_t Sel1, uint8_t Sel2, uint8_t Sel3, uint8_t Sel4,
|
||||
uint8_t Sel5, uint8_t Sel6, uint8_t Sel7, uint8_t Sel8, uint8_t Sel9,
|
||||
uint8_t Sel10, uint8_t Sel11, uint8_t Sel12, uint8_t Sel13, uint8_t Sel14,
|
||||
uint8_t Sel15>
|
||||
Vc_INTRINSIC Vc_CONST __m256i
|
||||
blend(__m256i a, __m256i b, Mask<Sel0, Sel1, Sel2, Sel3, Sel4, Sel5, Sel6, Sel7, Sel8,
|
||||
Sel9, Sel10, Sel11, Sel12, Sel13, Sel14, Sel15>)
|
||||
{
|
||||
static_assert((Sel0 == 0 || Sel0 == 1) && (Sel1 == 0 || Sel1 == 1) &&
|
||||
(Sel2 == 0 || Sel2 == 1) && (Sel3 == 0 || Sel3 == 1) &&
|
||||
(Sel4 == 0 || Sel4 == 1) && (Sel5 == 0 || Sel5 == 1) &&
|
||||
(Sel6 == 0 || Sel6 == 1) && (Sel7 == 0 || Sel7 == 1) &&
|
||||
(Sel8 == 0 || Sel8 == 1) && (Sel9 == 0 || Sel9 == 1) &&
|
||||
(Sel10 == 0 || Sel10 == 1) && (Sel11 == 0 || Sel11 == 1) &&
|
||||
(Sel12 == 0 || Sel12 == 1) && (Sel13 == 0 || Sel13 == 1) &&
|
||||
(Sel14 == 0 || Sel14 == 1) && (Sel15 == 0 || Sel15 == 1),
|
||||
"Selectors must be 0 or 1 to select the value from a or b");
|
||||
constexpr uint8_t mask = static_cast<uint8_t>(
|
||||
(Sel0 << 0 ) | (Sel1 << 1 ) | (Sel2 << 2 ) | (Sel3 << 3 ) |
|
||||
(Sel4 << 4 ) | (Sel5 << 5 ) | (Sel6 << 6 ) | (Sel7 << 7 ) |
|
||||
(Sel8 << 8 ) | (Sel9 << 9 ) | (Sel10 << 10) | (Sel11 << 11) |
|
||||
(Sel12 << 12) | (Sel13 << 13) | (Sel14 << 14) | (Sel15 << 15));
|
||||
return _mm256_blend_epi16(a, b, mask);
|
||||
}
|
||||
#endif // Vc_IMPL_AVX2
|
||||
} // namespace Detail
|
||||
namespace Mem
|
||||
{
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256i Vc_CONST permuteLo(__m256i x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
|
||||
return _mm256_shufflelo_epi16(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
|
||||
}
|
||||
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256i Vc_CONST permuteHi(__m256i x) {
|
||||
static_assert(Dst0 >= X4 && Dst1 >= X4 && Dst2 >= X4 && Dst3 >= X4, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X7 && Dst1 <= X7 && Dst2 <= X7 && Dst3 <= X7, "Incorrect_Range");
|
||||
return _mm256_shufflehi_epi16(x, (Dst0 - X4) + (Dst1 - X4) * 4 + (Dst2 - X4) * 16 + (Dst3 - X4) * 64);
|
||||
}
|
||||
#endif // Vc_IMPL_AVX2
|
||||
|
||||
template<VecPos L, VecPos H> static Vc_ALWAYS_INLINE __m256 Vc_CONST permute128(__m256 x) {
|
||||
static_assert((L >= X0 && L <= X1) || L == Const0, "Incorrect_Range");
|
||||
static_assert((H >= X0 && H <= X1) || H == Const0, "Incorrect_Range");
|
||||
return _mm256_permute2f128_ps(
|
||||
x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4)));
|
||||
}
|
||||
template<VecPos L, VecPos H> static Vc_ALWAYS_INLINE __m256d Vc_CONST permute128(__m256d x) {
|
||||
static_assert((L >= X0 && L <= X1) || L == Const0, "Incorrect_Range");
|
||||
static_assert((H >= X0 && H <= X1) || H == Const0, "Incorrect_Range");
|
||||
return _mm256_permute2f128_pd(
|
||||
x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4)));
|
||||
}
|
||||
template<VecPos L, VecPos H> static Vc_ALWAYS_INLINE __m256i Vc_CONST permute128(__m256i x) {
|
||||
static_assert((L >= X0 && L <= X1) || L == Const0, "Incorrect_Range");
|
||||
static_assert((H >= X0 && H <= X1) || H == Const0, "Incorrect_Range");
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_permute2x128_si256(
|
||||
x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4)));
|
||||
#else
|
||||
return _mm256_permute2f128_si256(
|
||||
x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4)));
|
||||
#endif
|
||||
}
|
||||
template<VecPos L, VecPos H> static Vc_ALWAYS_INLINE __m256 Vc_CONST shuffle128(__m256 x, __m256 y) {
|
||||
static_assert(L >= X0 && H >= X0, "Incorrect_Range");
|
||||
static_assert(L <= Y1 && H <= Y1, "Incorrect_Range");
|
||||
return _mm256_permute2f128_ps(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
}
|
||||
template<VecPos L, VecPos H> static Vc_ALWAYS_INLINE __m256i Vc_CONST shuffle128(__m256i x, __m256i y) {
|
||||
static_assert(L >= X0 && H >= X0, "Incorrect_Range");
|
||||
static_assert(L <= Y1 && H <= Y1, "Incorrect_Range");
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_permute2x128_si256(
|
||||
x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
#else
|
||||
return _mm256_permute2f128_si256(
|
||||
x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
#endif
|
||||
}
|
||||
template<VecPos L, VecPos H> static Vc_ALWAYS_INLINE __m256d Vc_CONST shuffle128(__m256d x, __m256d y) {
|
||||
static_assert(L >= X0 && H >= X0, "Incorrect_Range");
|
||||
static_assert(L <= Y1 && H <= Y1, "Incorrect_Range");
|
||||
return _mm256_permute2f128_pd(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
}
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256d Vc_CONST permute(__m256d x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X2 && Dst3 >= X2, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X1 && Dst1 <= X1 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
|
||||
return _mm256_permute_pd(x, Dst0 + Dst1 * 2 + (Dst2 - X2) * 4 + (Dst3 - X2) * 8);
|
||||
}
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256 Vc_CONST permute(__m256 x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
|
||||
return _mm256_permute_ps(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
|
||||
}
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256i Vc_CONST permute(__m256i x) {
|
||||
return _mm256_castps_si256(permute<Dst0, Dst1, Dst2, Dst3>(_mm256_castsi256_ps(x)));
|
||||
}
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256i Vc_CONST permute4x64(__m256i x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
|
||||
return _mm256_permute4x64_epi64(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
|
||||
}
|
||||
#endif // Vc_IMPL_AVX2
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256d Vc_CONST shuffle(__m256d x, __m256d y) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= Y0 && Dst2 >= X2 && Dst3 >= Y2, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X1 && Dst1 <= Y1 && Dst2 <= X3 && Dst3 <= Y3, "Incorrect_Range");
|
||||
return _mm256_shuffle_pd(x, y, Dst0 + (Dst1 - Y0) * 2 + (Dst2 - X2) * 4 + (Dst3 - Y2) * 8);
|
||||
}
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m256 Vc_CONST shuffle(__m256 x, __m256 y) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= Y0 && Dst3 >= Y0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= Y3 && Dst3 <= Y3, "Incorrect_Range");
|
||||
return _mm256_shuffle_ps(x, y, Dst0 + Dst1 * 4 + (Dst2 - Y0) * 16 + (Dst3 - Y0) * 64);
|
||||
}
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3, VecPos Dst4, VecPos Dst5, VecPos Dst6, VecPos Dst7>
|
||||
static Vc_ALWAYS_INLINE __m256 Vc_CONST blend(__m256 x, __m256 y) {
|
||||
static_assert(Dst0 == X0 || Dst0 == Y0, "Incorrect_Range");
|
||||
static_assert(Dst1 == X1 || Dst1 == Y1, "Incorrect_Range");
|
||||
static_assert(Dst2 == X2 || Dst2 == Y2, "Incorrect_Range");
|
||||
static_assert(Dst3 == X3 || Dst3 == Y3, "Incorrect_Range");
|
||||
static_assert(Dst4 == X4 || Dst4 == Y4, "Incorrect_Range");
|
||||
static_assert(Dst5 == X5 || Dst5 == Y5, "Incorrect_Range");
|
||||
static_assert(Dst6 == X6 || Dst6 == Y6, "Incorrect_Range");
|
||||
static_assert(Dst7 == X7 || Dst7 == Y7, "Incorrect_Range");
|
||||
return _mm256_blend_ps(x, y,
|
||||
(Dst0 / Y0) * 1 + (Dst1 / Y1) * 2 +
|
||||
(Dst2 / Y2) * 4 + (Dst3 / Y3) * 8 +
|
||||
(Dst4 / Y4) * 16 + (Dst5 / Y5) * 32 +
|
||||
(Dst6 / Y6) * 64 + (Dst7 / Y7) *128
|
||||
);
|
||||
}
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3, VecPos Dst4, VecPos Dst5, VecPos Dst6, VecPos Dst7>
|
||||
static Vc_ALWAYS_INLINE __m256i Vc_CONST blend(__m256i x, __m256i y) {
|
||||
return _mm256_castps_si256(blend<Dst0, Dst1, Dst2, Dst3, Dst4, Dst5, Dst6, Dst7>(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y)));
|
||||
}
|
||||
template<VecPos Dst> struct ScaleForBlend { enum { Value = Dst >= X4 ? Dst - X4 + Y0 : Dst }; };
|
||||
template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3, VecPos Dst4, VecPos Dst5, VecPos Dst6, VecPos Dst7>
|
||||
static Vc_ALWAYS_INLINE __m256 Vc_CONST permute(__m256 x) {
|
||||
static_assert(Dst0 >= X0 && Dst0 <= X7, "Incorrect_Range");
|
||||
static_assert(Dst1 >= X0 && Dst1 <= X7, "Incorrect_Range");
|
||||
static_assert(Dst2 >= X0 && Dst2 <= X7, "Incorrect_Range");
|
||||
static_assert(Dst3 >= X0 && Dst3 <= X7, "Incorrect_Range");
|
||||
static_assert(Dst4 >= X0 && Dst4 <= X7, "Incorrect_Range");
|
||||
static_assert(Dst5 >= X0 && Dst5 <= X7, "Incorrect_Range");
|
||||
static_assert(Dst6 >= X0 && Dst6 <= X7, "Incorrect_Range");
|
||||
static_assert(Dst7 >= X0 && Dst7 <= X7, "Incorrect_Range");
|
||||
if (Dst0 + X4 == Dst4 && Dst1 + X4 == Dst5 && Dst2 + X4 == Dst6 && Dst3 + X4 == Dst7) {
|
||||
return permute<Dst0, Dst1, Dst2, Dst3>(x);
|
||||
}
|
||||
const __m128 loIn = _mm256_castps256_ps128(x);
|
||||
const __m128 hiIn = _mm256_extractf128_ps(x, 1);
|
||||
__m128 lo, hi;
|
||||
|
||||
if (Dst0 < X4 && Dst1 < X4 && Dst2 < X4 && Dst3 < X4) {
|
||||
lo = _mm_permute_ps(loIn, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
|
||||
} else if (Dst0 >= X4 && Dst1 >= X4 && Dst2 >= X4 && Dst3 >= X4) {
|
||||
lo = _mm_permute_ps(hiIn, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
|
||||
} else if (Dst0 < X4 && Dst1 < X4 && Dst2 >= X4 && Dst3 >= X4) {
|
||||
lo = shuffle<Dst0, Dst1, Dst2 - X4 + Y0, Dst3 - X4 + Y0>(loIn, hiIn);
|
||||
} else if (Dst0 >= X4 && Dst1 >= X4 && Dst2 < X4 && Dst3 < X4) {
|
||||
lo = shuffle<Dst0 - X4, Dst1 - X4, Dst2 + Y0, Dst3 + Y0>(hiIn, loIn);
|
||||
} else if (Dst0 == X0 && Dst1 == X4 && Dst2 == X1 && Dst3 == X5) {
|
||||
lo = _mm_unpacklo_ps(loIn, hiIn);
|
||||
} else if (Dst0 == X4 && Dst1 == X0 && Dst2 == X5 && Dst3 == X1) {
|
||||
lo = _mm_unpacklo_ps(hiIn, loIn);
|
||||
} else if (Dst0 == X2 && Dst1 == X6 && Dst2 == X3 && Dst3 == X7) {
|
||||
lo = _mm_unpackhi_ps(loIn, hiIn);
|
||||
} else if (Dst0 == X6 && Dst1 == X2 && Dst2 == X7 && Dst3 == X3) {
|
||||
lo = _mm_unpackhi_ps(hiIn, loIn);
|
||||
} else if (Dst0 % X4 == 0 && Dst1 % X4 == 1 && Dst2 % X4 == 2 && Dst3 % X4 == 3) {
|
||||
lo = blend<ScaleForBlend<Dst0>::Value, ScaleForBlend<Dst1>::Value,
|
||||
ScaleForBlend<Dst2>::Value, ScaleForBlend<Dst3>::Value>(loIn, hiIn);
|
||||
}
|
||||
|
||||
if (Dst4 >= X4 && Dst5 >= X4 && Dst6 >= X4 && Dst7 >= X4) {
|
||||
hi = _mm_permute_ps(hiIn, (Dst4 - X4) + (Dst5 - X4) * 4 + (Dst6 - X4) * 16 + (Dst7 - X4) * 64);
|
||||
} else if (Dst4 < X4 && Dst5 < X4 && Dst6 < X4 && Dst7 < X4) {
|
||||
hi = _mm_permute_ps(loIn, (Dst4 - X4) + (Dst5 - X4) * 4 + (Dst6 - X4) * 16 + (Dst7 - X4) * 64);
|
||||
} else if (Dst4 < X4 && Dst5 < X4 && Dst6 >= X4 && Dst7 >= X4) {
|
||||
hi = shuffle<Dst4, Dst5, Dst6 - X4 + Y0, Dst7 - X4 + Y0>(loIn, hiIn);
|
||||
} else if (Dst4 >= X4 && Dst5 >= X4 && Dst6 < X4 && Dst7 < X4) {
|
||||
hi = shuffle<Dst4 - X4, Dst5 - X4, Dst6 + Y0, Dst7 + Y0>(hiIn, loIn);
|
||||
} else if (Dst4 == X0 && Dst5 == X4 && Dst6 == X1 && Dst7 == X5) {
|
||||
hi = _mm_unpacklo_ps(loIn, hiIn);
|
||||
} else if (Dst4 == X4 && Dst5 == X0 && Dst6 == X5 && Dst7 == X1) {
|
||||
hi = _mm_unpacklo_ps(hiIn, loIn);
|
||||
} else if (Dst4 == X2 && Dst5 == X6 && Dst6 == X3 && Dst7 == X7) {
|
||||
hi = _mm_unpackhi_ps(loIn, hiIn);
|
||||
} else if (Dst4 == X6 && Dst5 == X2 && Dst6 == X7 && Dst7 == X3) {
|
||||
hi = _mm_unpackhi_ps(hiIn, loIn);
|
||||
} else if (Dst4 % X4 == 0 && Dst5 % X4 == 1 && Dst6 % X4 == 2 && Dst7 % X4 == 3) {
|
||||
hi = blend<ScaleForBlend<Dst4>::Value, ScaleForBlend<Dst5>::Value,
|
||||
ScaleForBlend<Dst6>::Value, ScaleForBlend<Dst7>::Value>(loIn, hiIn);
|
||||
}
|
||||
|
||||
return _mm256_insertf128_ps(_mm256_castps128_ps256(lo), hi, 1);
|
||||
}
|
||||
} // namespace Mem
|
||||
} // namespace Vc
|
||||
|
||||
// little endian has the lo bits on the right and high bits on the left
|
||||
// with vectors this becomes greatly confusing:
|
||||
// Mem: abcd
|
||||
// Reg: dcba
|
||||
//
|
||||
// The shuffles and permutes above use memory ordering. The ones below use register ordering:
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Reg
|
||||
{
|
||||
template<VecPos H, VecPos L> static Vc_ALWAYS_INLINE __m256 Vc_CONST permute128(__m256 x, __m256 y) {
|
||||
static_assert(L >= X0 && H >= X0, "Incorrect_Range");
|
||||
static_assert(L <= Y1 && H <= Y1, "Incorrect_Range");
|
||||
return _mm256_permute2f128_ps(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
}
|
||||
template<VecPos H, VecPos L> static Vc_ALWAYS_INLINE __m256i Vc_CONST permute128(__m256i x, __m256i y) {
|
||||
static_assert(L >= X0 && H >= X0, "Incorrect_Range");
|
||||
static_assert(L <= Y1 && H <= Y1, "Incorrect_Range");
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
return _mm256_permute2x128_si256(
|
||||
x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
#else
|
||||
return _mm256_permute2f128_si256(
|
||||
x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
#endif
|
||||
}
|
||||
template<VecPos H, VecPos L> static Vc_ALWAYS_INLINE __m256d Vc_CONST permute128(__m256d x, __m256d y) {
|
||||
static_assert(L >= X0 && H >= X0, "Incorrect_Range");
|
||||
static_assert(L <= Y1 && H <= Y1, "Incorrect_Range");
|
||||
return _mm256_permute2f128_pd(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4));
|
||||
}
|
||||
template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m256d Vc_CONST permute(__m256d x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X2 && Dst3 >= X2, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X1 && Dst1 <= X1 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
|
||||
return _mm256_permute_pd(x, Dst0 + Dst1 * 2 + (Dst2 - X2) * 4 + (Dst3 - X2) * 8);
|
||||
}
|
||||
template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m256 Vc_CONST permute(__m256 x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
|
||||
return _mm256_permute_ps(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
|
||||
}
|
||||
template<VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128d Vc_CONST permute(__m128d x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X1 && Dst1 <= X1, "Incorrect_Range");
|
||||
return _mm_permute_pd(x, Dst0 + Dst1 * 2);
|
||||
}
|
||||
template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128 Vc_CONST permute(__m128 x) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
|
||||
return _mm_permute_ps(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
|
||||
}
|
||||
template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m256d Vc_CONST shuffle(__m256d x, __m256d y) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= Y0 && Dst2 >= X2 && Dst3 >= Y2, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X1 && Dst1 <= Y1 && Dst2 <= X3 && Dst3 <= Y3, "Incorrect_Range");
|
||||
return _mm256_shuffle_pd(x, y, Dst0 + (Dst1 - Y0) * 2 + (Dst2 - X2) * 4 + (Dst3 - Y2) * 8);
|
||||
}
|
||||
template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m256 Vc_CONST shuffle(__m256 x, __m256 y) {
|
||||
static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= Y0 && Dst3 >= Y0, "Incorrect_Range");
|
||||
static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= Y3 && Dst3 <= Y3, "Incorrect_Range");
|
||||
return _mm256_shuffle_ps(x, y, Dst0 + Dst1 * 4 + (Dst2 - Y0) * 16 + (Dst3 - Y0) * 64);
|
||||
}
|
||||
} // namespace Reg
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_SHUFFLE_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,55 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef Vc_AVX_SIMD_CAST_CALLER_TCC_
|
||||
#define Vc_AVX_SIMD_CAST_CALLER_TCC_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
#if Vc_IS_VERSION_1
|
||||
template <typename T>
|
||||
template <typename U, typename>
|
||||
Vc_INTRINSIC Vector<T, VectorAbi::Avx>::Vector(U &&x)
|
||||
: d(simd_cast<Vector>(std::forward<U>(x)).data())
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename U>
|
||||
Vc_INTRINSIC Mask<T, VectorAbi::Avx>::Mask(U &&rhs,
|
||||
Common::enable_if_mask_converts_explicitly<T, U>)
|
||||
: Mask(simd_cast<Mask>(std::forward<U>(rhs)))
|
||||
{
|
||||
}
|
||||
#endif // Vc_IS_VERSION_1
|
||||
}
|
||||
|
||||
#endif // Vc_AVX_SIMD_CAST_CALLER_TCC_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,120 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_TYPES_H_
|
||||
#define VC_AVX_TYPES_H_
|
||||
|
||||
#include "../sse/types.h"
|
||||
#include "../traits/type_traits.h"
|
||||
#include "macros.h"
|
||||
|
||||
#ifdef Vc_DEFAULT_IMPL_AVX2
|
||||
#define Vc_DOUBLE_V_SIZE 4
|
||||
#define Vc_FLOAT_V_SIZE 8
|
||||
#define Vc_INT_V_SIZE 8
|
||||
#define Vc_UINT_V_SIZE 8
|
||||
#define Vc_SHORT_V_SIZE 16
|
||||
#define Vc_USHORT_V_SIZE 16
|
||||
#elif defined Vc_DEFAULT_IMPL_AVX
|
||||
#define Vc_DOUBLE_V_SIZE 4
|
||||
#define Vc_FLOAT_V_SIZE 8
|
||||
#define Vc_INT_V_SIZE 4
|
||||
#define Vc_UINT_V_SIZE 4
|
||||
#define Vc_SHORT_V_SIZE 8
|
||||
#define Vc_USHORT_V_SIZE 8
|
||||
#endif
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
template <typename T> using Vector = Vc::Vector<T, VectorAbi::Avx1Abi<T>>;
|
||||
typedef Vector<double> double_v;
|
||||
typedef Vector<float> float_v;
|
||||
typedef Vector<int> int_v;
|
||||
typedef Vector<unsigned int> uint_v;
|
||||
typedef Vector<short> short_v;
|
||||
typedef Vector<unsigned short> ushort_v;
|
||||
|
||||
template <typename T> using Mask = Vc::Mask<T, VectorAbi::Avx1Abi<T>>;
|
||||
typedef Mask<double> double_m;
|
||||
typedef Mask<float> float_m;
|
||||
typedef Mask<int> int_m;
|
||||
typedef Mask<unsigned int> uint_m;
|
||||
typedef Mask<short> short_m;
|
||||
typedef Mask<unsigned short> ushort_m;
|
||||
|
||||
template <typename T> struct Const;
|
||||
|
||||
template <typename T> struct is_vector : public std::false_type {};
|
||||
template <typename T> struct is_vector<Vector<T>> : public std::true_type {};
|
||||
template <typename T> struct is_mask : public std::false_type {};
|
||||
template <typename T> struct is_mask<Mask<T>> : public std::true_type {};
|
||||
} // namespace AVX
|
||||
|
||||
namespace AVX2
|
||||
{
|
||||
template <typename T> using Vector = Vc::Vector<T, VectorAbi::Avx>;
|
||||
using double_v = Vector<double>;
|
||||
using float_v = Vector< float>;
|
||||
using int_v = Vector< int>;
|
||||
using uint_v = Vector< uint>;
|
||||
using short_v = Vector< short>;
|
||||
using ushort_v = Vector<ushort>;
|
||||
|
||||
template <typename T> using Mask = Vc::Mask<T, VectorAbi::Avx>;
|
||||
using double_m = Mask<double>;
|
||||
using float_m = Mask< float>;
|
||||
using llong_m = Mask< llong>;
|
||||
using ullong_m = Mask<ullong>;
|
||||
using long_m = Mask< long>;
|
||||
using ulong_m = Mask< ulong>;
|
||||
using int_m = Mask< int>;
|
||||
using uint_m = Mask< uint>;
|
||||
using short_m = Mask< short>;
|
||||
using ushort_m = Mask<ushort>;
|
||||
using schar_m = Mask< schar>;
|
||||
using uchar_m = Mask< uchar>;
|
||||
|
||||
template <typename T> struct is_vector : public std::false_type {};
|
||||
template <typename T> struct is_vector<Vector<T>> : public std::true_type {};
|
||||
template <typename T> struct is_mask : public std::false_type {};
|
||||
template <typename T> struct is_mask<Mask<T>> : public std::true_type {};
|
||||
} // namespace AVX2
|
||||
|
||||
namespace Traits
|
||||
{
|
||||
template <class T> struct
|
||||
is_simd_vector_internal<Vector<T, VectorAbi::Avx>>
|
||||
: public is_valid_vector_argument<T> {};
|
||||
|
||||
template<typename T> struct is_simd_mask_internal<Mask<T, VectorAbi::Avx>>
|
||||
: public std::true_type {};
|
||||
} // namespace Traits
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_TYPES_H_
|
|
@ -0,0 +1,545 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_VECTOR_H_
|
||||
#define VC_AVX_VECTOR_H_
|
||||
|
||||
#include "intrinsics.h"
|
||||
#include "casts.h"
|
||||
#include "../sse/vector.h"
|
||||
#include "shuffle.h"
|
||||
#include "vectorhelper.h"
|
||||
#include "mask.h"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include "../common/aliasingentryhelper.h"
|
||||
#include "../common/memoryfwd.h"
|
||||
#include "../common/where.h"
|
||||
#include "macros.h"
|
||||
|
||||
#ifdef isfinite
|
||||
#undef isfinite
|
||||
#endif
|
||||
#ifdef isnan
|
||||
#undef isnan
|
||||
#endif
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
template <typename T, typename Abi> struct VectorTraits
|
||||
{
|
||||
using mask_type = Vc::Mask<T, Abi>;
|
||||
using vector_type = Vc::Vector<T, Abi>;
|
||||
using writemasked_vector_type = Common::WriteMaskedVector<vector_type, mask_type>;
|
||||
using intrinsic_type = typename AVX::VectorTypeHelper<T>::Type;
|
||||
};
|
||||
} // namespace Detail
|
||||
|
||||
#define Vc_CURRENT_CLASS_NAME Vector
|
||||
template <typename T> class Vector<T, VectorAbi::Avx>
|
||||
{
|
||||
public:
|
||||
using abi = VectorAbi::Avx;
|
||||
|
||||
private:
|
||||
using traits_type = Detail::VectorTraits<T, abi>;
|
||||
static_assert(
|
||||
std::is_arithmetic<T>::value,
|
||||
"Vector<T> only accepts arithmetic builtin types as template parameter T.");
|
||||
|
||||
using WriteMaskedVector = typename traits_type::writemasked_vector_type;
|
||||
|
||||
public:
|
||||
using VectorType = typename traits_type::intrinsic_type;
|
||||
using vector_type = VectorType;
|
||||
|
||||
using mask_type = typename traits_type::mask_type;
|
||||
using Mask = mask_type;
|
||||
using MaskType = mask_type;
|
||||
using MaskArg Vc_DEPRECATED_ALIAS("Use MaskArgument instead.") = typename Mask::AsArg;
|
||||
using MaskArgument = typename Mask::AsArg;
|
||||
using reference = Detail::ElementReference<Vector>;
|
||||
|
||||
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(VectorType));
|
||||
|
||||
using EntryType = T;
|
||||
using value_type = EntryType;
|
||||
typedef EntryType VectorEntryType;
|
||||
static constexpr size_t Size = sizeof(VectorType) / sizeof(EntryType);
|
||||
static constexpr size_t MemoryAlignment = alignof(VectorType);
|
||||
using IndexType = fixed_size_simd<int, Size>;
|
||||
using index_type = IndexType;
|
||||
typedef Vector<T, abi> AsArg;
|
||||
typedef VectorType VectorTypeArg;
|
||||
|
||||
protected:
|
||||
template <typename U> using V = Vector<U, abi>;
|
||||
|
||||
// helper that specializes on VectorType
|
||||
typedef AVX::VectorHelper<VectorType> HV;
|
||||
|
||||
// helper that specializes on T
|
||||
typedef AVX::VectorHelper<T> HT;
|
||||
|
||||
// cast any m256/m128 to VectorType
|
||||
template <typename V> static Vc_INTRINSIC VectorType _cast(V v)
|
||||
{
|
||||
return AVX::avx_cast<VectorType>(v);
|
||||
}
|
||||
|
||||
typedef Common::VectorMemoryUnion<VectorType, EntryType> StorageType;
|
||||
StorageType d;
|
||||
|
||||
using WidthT = Common::WidthT<VectorType>;
|
||||
// ICC can't compile this:
|
||||
// static constexpr WidthT Width = WidthT();
|
||||
|
||||
public:
|
||||
#include "../common/generalinterface.h"
|
||||
|
||||
static Vc_ALWAYS_INLINE_L Vector Random() Vc_ALWAYS_INLINE_R;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// internal: required to enable returning objects of VectorType
|
||||
Vc_ALWAYS_INLINE Vector(VectorTypeArg x) : d(x) {}
|
||||
|
||||
// implict conversion from compatible Vector<U, abi>
|
||||
template <typename U>
|
||||
Vc_INTRINSIC Vector(
|
||||
V<U> x, typename std::enable_if<Traits::is_implicit_cast_allowed<U, T>::value,
|
||||
void *>::type = nullptr)
|
||||
: d(AVX::convert<U, T>(x.data()))
|
||||
{
|
||||
}
|
||||
|
||||
#if Vc_IS_VERSION_1
|
||||
// static_cast from the remaining Vector<U, abi>
|
||||
template <typename U>
|
||||
Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
|
||||
"vector types") Vc_INTRINSIC explicit Vector(
|
||||
V<U> x,
|
||||
typename std::enable_if<!Traits::is_implicit_cast_allowed<U, T>::value,
|
||||
void *>::type = nullptr)
|
||||
: d(Detail::zeroExtendIfNeeded(AVX::convert<U, T>(x.data())))
|
||||
{
|
||||
}
|
||||
|
||||
// static_cast from other types, implemented via the non-member simd_cast function in
|
||||
// simd_cast_caller.tcc
|
||||
template <typename U,
|
||||
typename = enable_if<Traits::is_simd_vector<U>::value &&
|
||||
!std::is_same<Vector, Traits::decay<U>>::value>>
|
||||
Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
|
||||
"vector types") Vc_INTRINSIC_L
|
||||
explicit Vector(U &&x) Vc_INTRINSIC_R;
|
||||
#endif
|
||||
|
||||
Vc_INTRINSIC explicit Vector(reference a) : Vector(static_cast<EntryType>(a)) {}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// broadcast
|
||||
Vc_INTRINSIC Vector(EntryType a) : d(Detail::avx_broadcast(a)) {}
|
||||
template <typename U>
|
||||
Vc_INTRINSIC Vector(U a,
|
||||
typename std::enable_if<std::is_same<U, int>::value &&
|
||||
!std::is_same<U, EntryType>::value,
|
||||
void *>::type = nullptr)
|
||||
: Vector(static_cast<EntryType>(a))
|
||||
{
|
||||
}
|
||||
|
||||
//template<typename U>
|
||||
explicit Vector(std::initializer_list<EntryType>)
|
||||
{
|
||||
static_assert(std::is_same<EntryType, void>::value,
|
||||
"A SIMD vector object cannot be initialized from an initializer list "
|
||||
"because the number of entries in the vector is target-dependent.");
|
||||
}
|
||||
|
||||
#include "../common/loadinterface.h"
|
||||
#include "../common/storeinterface.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// zeroing
|
||||
Vc_INTRINSIC_L void setZero() Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L void setZero(const Mask &k) Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L void setZeroInverted(const Mask &k) Vc_INTRINSIC_R;
|
||||
|
||||
Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L void setQnan(MaskArgument k) Vc_INTRINSIC_R;
|
||||
|
||||
#include "../common/gatherinterface.h"
|
||||
#include "../common/scatterinterface.h"
|
||||
#if defined Vc_IMPL_AVX2 && !defined Vc_MSVC
|
||||
// skip this code for MSVC because it fails to do overload resolution correctly
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// non-converting pd, ps, and epi32 gathers
|
||||
template <class U, class A, int Scale, int N = Vector<U, A>::size(),
|
||||
class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
|
||||
Vc_INTRINSIC void gatherImplementation(
|
||||
const Common::GatherArguments<T, Vector<U, A>, Scale> &args)
|
||||
{
|
||||
d.v() = AVX::gather<sizeof(T) * Scale>(
|
||||
args.address,
|
||||
simd_cast<conditional_t<Size == 4, SSE::int_v, AVX2::int_v>>(args.indexes)
|
||||
.data());
|
||||
}
|
||||
|
||||
// masked overload
|
||||
template <class U, class A, int Scale, int N = Vector<U, A>::size(),
|
||||
class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
|
||||
Vc_INTRINSIC void gatherImplementation(
|
||||
const Common::GatherArguments<T, Vector<U, A>, Scale> &args, MaskArgument k)
|
||||
{
|
||||
d.v() = AVX::gather<sizeof(T) * Scale>(
|
||||
d.v(), k.data(), args.address,
|
||||
simd_cast<conditional_t<Size == 4, SSE::int_v, AVX2::int_v>>(args.indexes)
|
||||
.data());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// converting (from 8-bit and 16-bit integers only) epi16 gather emulation via
|
||||
// epi32 gathers
|
||||
template <
|
||||
class MT, class U, class A, int Scale,
|
||||
class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
|
||||
(sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
|
||||
Vc_INTRINSIC void gatherImplementation(
|
||||
const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
|
||||
{
|
||||
using AVX2::int_v;
|
||||
const auto idx0 = simd_cast<int_v, 0>(args.indexes).data();
|
||||
const auto idx1 = simd_cast<int_v, 1>(args.indexes).data();
|
||||
*this = simd_cast<Vector>(int_v(AVX::gather<sizeof(MT) * Scale>(
|
||||
aliasing_cast<int>(args.address), idx0)),
|
||||
int_v(AVX::gather<sizeof(MT) * Scale>(
|
||||
aliasing_cast<int>(args.address), idx1)));
|
||||
if (sizeof(MT) == 1) {
|
||||
if (std::is_signed<MT>::value) {
|
||||
using Signed = AVX2::Vector<typename std::make_signed<T>::type>;
|
||||
*this = (simd_cast<Signed>(*this) << 8) >> 8; // sign extend
|
||||
} else {
|
||||
*this &= 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// masked overload
|
||||
template <
|
||||
class MT, class U, class A, int Scale,
|
||||
class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
|
||||
(sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
|
||||
Vc_INTRINSIC void gatherImplementation(
|
||||
const Common::GatherArguments<MT, Vector<U, A>, Scale> &args, MaskArgument k)
|
||||
{
|
||||
using AVX2::int_v;
|
||||
const auto idx0 = simd_cast<int_v, 0>(args.indexes).data();
|
||||
const auto idx1 = simd_cast<int_v, 1>(args.indexes).data();
|
||||
const auto k0 = simd_cast<AVX2::int_m, 0>(k).data();
|
||||
const auto k1 = simd_cast<AVX2::int_m, 1>(k).data();
|
||||
auto v = simd_cast<Vector>(
|
||||
int_v(AVX::gather<sizeof(MT) * Scale>(
|
||||
_mm256_setzero_si256(), k0, aliasing_cast<int>(args.address), idx0)),
|
||||
int_v(AVX::gather<sizeof(MT) * Scale>(
|
||||
_mm256_setzero_si256(), k1, aliasing_cast<int>(args.address), idx1)));
|
||||
if (sizeof(MT) == 1) {
|
||||
if (std::is_signed<MT>::value) {
|
||||
using Signed = AVX2::Vector<typename std::make_signed<T>::type>;
|
||||
v = (simd_cast<Signed>(v) << 8) >> 8; // sign extend
|
||||
} else {
|
||||
v &= 0xff;
|
||||
}
|
||||
}
|
||||
assign(v, k);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// all remaining converting gathers
|
||||
template <class MT, class U, class A, int Scale>
|
||||
Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
|
||||
Traits::is_valid_vector_argument<MT>::value &&
|
||||
!std::is_same<MT, T>::value &&
|
||||
Vector<U, A>::size() >= size()),
|
||||
void>
|
||||
gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
|
||||
{
|
||||
*this = simd_cast<Vector>(fixed_size_simd<MT, Size>(args));
|
||||
}
|
||||
|
||||
// masked overload
|
||||
template <class MT, class U, class A, int Scale>
|
||||
Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
|
||||
Traits::is_valid_vector_argument<MT>::value &&
|
||||
!std::is_same<MT, T>::value &&
|
||||
Vector<U, A>::size() >= size()),
|
||||
void>
|
||||
gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args,
|
||||
MaskArgument k)
|
||||
{
|
||||
assign(simd_cast<Vector>(fixed_size_simd<MT, Size>(args, k)), k);
|
||||
}
|
||||
#endif // Vc_IMPL_AVX2 && !MSVC
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
//prefix
|
||||
Vc_ALWAYS_INLINE Vector &operator++() { data() = Detail::add(data(), Detail::one(T()), T()); return *this; }
|
||||
Vc_ALWAYS_INLINE Vector &operator--() { data() = Detail::sub(data(), Detail::one(T()), T()); return *this; }
|
||||
//postfix
|
||||
Vc_ALWAYS_INLINE Vector operator++(int) { const Vector r = *this; data() = Detail::add(data(), Detail::one(T()), T()); return r; }
|
||||
Vc_ALWAYS_INLINE Vector operator--(int) { const Vector r = *this; data() = Detail::sub(data(), Detail::one(T()), T()); return r; }
|
||||
|
||||
private:
|
||||
friend reference;
|
||||
Vc_INTRINSIC static value_type get(const Vector &o, int i) noexcept
|
||||
{
|
||||
return o.d.m(i);
|
||||
}
|
||||
template <typename U>
|
||||
Vc_INTRINSIC static void set(Vector &o, int i, U &&v) noexcept(
|
||||
noexcept(std::declval<value_type &>() = v))
|
||||
{
|
||||
return o.d.set(i, v);
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* \note the returned object models the concept of a reference and
|
||||
* as such it can exist longer than the data it is referencing.
|
||||
* \note to avoid lifetime issues, we strongly advice not to store
|
||||
* any reference objects.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
|
||||
{
|
||||
static_assert(noexcept(reference{std::declval<Vector &>(), int()}), "");
|
||||
return {*this, int(index)};
|
||||
}
|
||||
Vc_ALWAYS_INLINE value_type operator[](size_t index) const noexcept
|
||||
{
|
||||
return d.m(index);
|
||||
}
|
||||
|
||||
Vc_INTRINSIC_L Vc_PURE_L Vector operator[](Permutation::ReversedTag) const Vc_INTRINSIC_R Vc_PURE_R;
|
||||
Vc_INTRINSIC_L Vc_PURE_L Vector operator[](const IndexType &perm) const Vc_INTRINSIC_R Vc_PURE_R;
|
||||
|
||||
Vc_INTRINSIC Vc_PURE Mask operator!() const
|
||||
{
|
||||
return *this == Zero();
|
||||
}
|
||||
Vc_ALWAYS_INLINE Vector operator~() const
|
||||
{
|
||||
#ifndef Vc_ENABLE_FLOAT_BIT_OPERATORS
|
||||
static_assert(std::is_integral<T>::value,
|
||||
"bit-complement can only be used with Vectors of integral type");
|
||||
#endif
|
||||
return Detail::andnot_(data(), Detail::allone<VectorType>());
|
||||
}
|
||||
Vc_ALWAYS_INLINE_L Vc_PURE_L Vector operator-() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
|
||||
Vc_INTRINSIC Vc_PURE Vector operator+() const { return *this; }
|
||||
|
||||
// shifts
|
||||
#define Vc_OP_VEC(op) \
|
||||
Vc_INTRINSIC Vector &operator op##=(AsArg x); \
|
||||
Vc_INTRINSIC Vc_PURE Vector operator op(AsArg x) const \
|
||||
{ \
|
||||
static_assert( \
|
||||
std::is_integral<T>::value, \
|
||||
"bitwise-operators can only be used with Vectors of integral type"); \
|
||||
}
|
||||
Vc_ALL_SHIFTS(Vc_OP_VEC);
|
||||
#undef Vc_OP_VEC
|
||||
|
||||
Vc_ALWAYS_INLINE_L Vector &operator>>=(int x) Vc_ALWAYS_INLINE_R;
|
||||
Vc_ALWAYS_INLINE_L Vector &operator<<=(int x) Vc_ALWAYS_INLINE_R;
|
||||
Vc_ALWAYS_INLINE_L Vector operator>>(int x) const Vc_ALWAYS_INLINE_R;
|
||||
Vc_ALWAYS_INLINE_L Vector operator<<(int x) const Vc_ALWAYS_INLINE_R;
|
||||
|
||||
Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC Vc_PURE Mask
|
||||
isNegative() const
|
||||
{
|
||||
return Vc::isnegative(*this);
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE void assign( const Vector &v, const Mask &mask ) {
|
||||
data() = Detail::blend(data(), v.data(), mask.data());
|
||||
}
|
||||
|
||||
template <typename V2>
|
||||
Vc_DEPRECATED("Use simd_cast instead of Vector::staticCast") Vc_ALWAYS_INLINE V2
|
||||
staticCast() const
|
||||
{
|
||||
return V2(*this);
|
||||
}
|
||||
template <typename V2>
|
||||
Vc_DEPRECATED("use reinterpret_components_cast instead") Vc_ALWAYS_INLINE V2
|
||||
reinterpretCast() const
|
||||
{
|
||||
return AVX::avx_cast<typename V2::VectorType>(data());
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE WriteMaskedVector operator()(const Mask &k)
|
||||
{
|
||||
return {*this, k};
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE VectorType &data() { return d.v(); }
|
||||
Vc_ALWAYS_INLINE const VectorType &data() const { return d.v(); }
|
||||
|
||||
template<int Index>
|
||||
Vc_INTRINSIC_L Vector broadcast() const Vc_INTRINSIC_R;
|
||||
|
||||
Vc_INTRINSIC_L std::pair<Vector, int> minIndex() const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L std::pair<Vector, int> maxIndex() const Vc_INTRINSIC_R;
|
||||
|
||||
Vc_ALWAYS_INLINE EntryType min() const { return Detail::min(data(), T()); }
|
||||
Vc_ALWAYS_INLINE EntryType max() const { return Detail::max(data(), T()); }
|
||||
Vc_ALWAYS_INLINE EntryType product() const { return Detail::mul(data(), T()); }
|
||||
Vc_ALWAYS_INLINE EntryType sum() const { return Detail::add(data(), T()); }
|
||||
Vc_ALWAYS_INLINE_L Vector partialSum() const Vc_ALWAYS_INLINE_R;
|
||||
//template<typename BinaryOperation> Vc_ALWAYS_INLINE_L Vector partialSum(BinaryOperation op) const Vc_ALWAYS_INLINE_R;
|
||||
Vc_ALWAYS_INLINE_L EntryType min(MaskArgument m) const Vc_ALWAYS_INLINE_R;
|
||||
Vc_ALWAYS_INLINE_L EntryType max(MaskArgument m) const Vc_ALWAYS_INLINE_R;
|
||||
Vc_ALWAYS_INLINE_L EntryType product(MaskArgument m) const Vc_ALWAYS_INLINE_R;
|
||||
Vc_ALWAYS_INLINE_L EntryType sum(MaskArgument m) const Vc_ALWAYS_INLINE_R;
|
||||
|
||||
Vc_INTRINSIC_L Vector shifted(int amount, Vector shiftIn) const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L Vector shifted(int amount) const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L Vector rotated(int amount) const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L Vc_PURE_L Vector reversed() const Vc_INTRINSIC_R Vc_PURE_R;
|
||||
Vc_ALWAYS_INLINE_L Vc_PURE_L Vector sorted() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
|
||||
|
||||
template <typename F> void callWithValuesSorted(F &&f)
|
||||
{
|
||||
EntryType value = d.m(0);
|
||||
f(value);
|
||||
for (size_t i = 1; i < Size; ++i) {
|
||||
if (d.m(i) != value) {
|
||||
value = d.m(i);
|
||||
f(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F> Vc_INTRINSIC void call(F &&f) const
|
||||
{
|
||||
Common::for_all_vector_entries<Size>([&](size_t i) { f(EntryType(d.m(i))); });
|
||||
}
|
||||
|
||||
template <typename F> Vc_INTRINSIC void call(F &&f, const Mask &mask) const
|
||||
{
|
||||
for (size_t i : where(mask)) {
|
||||
f(EntryType(d.m(i)));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F> Vc_INTRINSIC Vector apply(F &&f) const
|
||||
{
|
||||
Vector r;
|
||||
Common::for_all_vector_entries<Size>(
|
||||
[&](size_t i) { r.d.set(i, f(EntryType(d.m(i)))); });
|
||||
return r;
|
||||
}
|
||||
|
||||
template <typename F> Vc_INTRINSIC Vector apply(F &&f, const Mask &mask) const
|
||||
{
|
||||
Vector r(*this);
|
||||
for (size_t i : where(mask)) {
|
||||
r.d.set(i, f(EntryType(r.d.m(i))));
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
template<typename IndexT> Vc_INTRINSIC void fill(EntryType (&f)(IndexT)) {
|
||||
Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f(i)); });
|
||||
}
|
||||
Vc_INTRINSIC void fill(EntryType (&f)()) {
|
||||
Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f()); });
|
||||
}
|
||||
|
||||
template <typename G> static Vc_INTRINSIC_L Vector generate(G gen) Vc_INTRINSIC_R;
|
||||
|
||||
Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC Vector
|
||||
copySign(AsArg x) const
|
||||
{
|
||||
return Vc::copysign(*this, x);
|
||||
}
|
||||
|
||||
Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC Vector exponent() const
|
||||
{
|
||||
Vc::exponent(*this);
|
||||
}
|
||||
|
||||
Vc_INTRINSIC_L Vector interleaveLow(Vector x) const Vc_INTRINSIC_R;
|
||||
Vc_INTRINSIC_L Vector interleaveHigh(Vector x) const Vc_INTRINSIC_R;
|
||||
};
|
||||
#undef Vc_CURRENT_CLASS_NAME
|
||||
template <typename T> constexpr size_t Vector<T, VectorAbi::Avx>::Size;
|
||||
template <typename T> constexpr size_t Vector<T, VectorAbi::Avx>::MemoryAlignment;
|
||||
|
||||
#define Vc_CONDITIONAL_ASSIGN(name_, op_) \
|
||||
template <Operator O, typename T, typename M, typename U> \
|
||||
Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
|
||||
AVX2::Vector<T> &lhs, M &&mask, U &&rhs) \
|
||||
{ \
|
||||
lhs(mask) op_ rhs; \
|
||||
} \
|
||||
Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
Vc_CONDITIONAL_ASSIGN( Assign, =);
|
||||
Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
|
||||
Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
|
||||
Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
|
||||
Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
|
||||
Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
|
||||
Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
|
||||
Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
|
||||
Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
|
||||
Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
|
||||
Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
|
||||
#undef Vc_CONDITIONAL_ASSIGN
|
||||
|
||||
#define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
|
||||
template <Operator O, typename T, typename M> \
|
||||
Vc_INTRINSIC enable_if<O == Operator::name_, AVX2::Vector<T>> conditional_assign( \
|
||||
AVX2::Vector<T> &lhs, M &&mask) \
|
||||
{ \
|
||||
return expr_; \
|
||||
} \
|
||||
Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
|
||||
Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
|
||||
Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
|
||||
Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
|
||||
#undef Vc_CONDITIONAL_ASSIGN
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#include "vector.tcc"
|
||||
#include "simd_cast.h"
|
||||
|
||||
#endif // VC_AVX_VECTOR_H_
|
|
@ -0,0 +1,939 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#include "../common/x86_prefetches.h"
|
||||
#include "../common/gatherimplementation.h"
|
||||
#include "../common/scatterimplementation.h"
|
||||
#include "limits.h"
|
||||
#include "const.h"
|
||||
#include "../common/set.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
// compare operators {{{1
|
||||
Vc_INTRINSIC AVX2::double_m operator==(AVX2::double_v a, AVX2::double_v b) { return AVX::cmpeq_pd(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: float_m operator==(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpeq_ps(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::double_m operator!=(AVX2::double_v a, AVX2::double_v b) { return AVX::cmpneq_pd(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: float_m operator!=(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpneq_ps(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::double_m operator>=(AVX2::double_v a, AVX2::double_v b) { return AVX::cmpnlt_pd(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: float_m operator>=(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpnlt_ps(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::double_m operator<=(AVX2::double_v a, AVX2::double_v b) { return AVX::cmple_pd(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: float_m operator<=(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmple_ps(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::double_m operator> (AVX2::double_v a, AVX2::double_v b) { return AVX::cmpgt_pd(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: float_m operator> (AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpgt_ps(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::double_m operator< (AVX2::double_v a, AVX2::double_v b) { return AVX::cmplt_pd(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: float_m operator< (AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmplt_ps(a.data(), b.data()); }
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
Vc_INTRINSIC AVX2:: int_m operator==(AVX2:: int_v a, AVX2:: int_v b) { return AVX::cmpeq_epi32(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: uint_m operator==(AVX2:: uint_v a, AVX2:: uint_v b) { return AVX::cmpeq_epi32(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: short_m operator==(AVX2:: short_v a, AVX2:: short_v b) { return AVX::cmpeq_epi16(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::ushort_m operator==(AVX2::ushort_v a, AVX2::ushort_v b) { return AVX::cmpeq_epi16(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: int_m operator!=(AVX2:: int_v a, AVX2:: int_v b) { return not_(AVX::cmpeq_epi32(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: uint_m operator!=(AVX2:: uint_v a, AVX2:: uint_v b) { return not_(AVX::cmpeq_epi32(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: short_m operator!=(AVX2:: short_v a, AVX2:: short_v b) { return not_(AVX::cmpeq_epi16(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2::ushort_m operator!=(AVX2::ushort_v a, AVX2::ushort_v b) { return not_(AVX::cmpeq_epi16(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: int_m operator>=(AVX2:: int_v a, AVX2:: int_v b) { return not_(AVX::cmplt_epi32(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: uint_m operator>=(AVX2:: uint_v a, AVX2:: uint_v b) { return not_(AVX::cmplt_epu32(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: short_m operator>=(AVX2:: short_v a, AVX2:: short_v b) { return not_(AVX::cmplt_epi16(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2::ushort_m operator>=(AVX2::ushort_v a, AVX2::ushort_v b) { return not_(AVX::cmplt_epu16(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: int_m operator<=(AVX2:: int_v a, AVX2:: int_v b) { return not_(AVX::cmpgt_epi32(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: uint_m operator<=(AVX2:: uint_v a, AVX2:: uint_v b) { return not_(AVX::cmpgt_epu32(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: short_m operator<=(AVX2:: short_v a, AVX2:: short_v b) { return not_(AVX::cmpgt_epi16(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2::ushort_m operator<=(AVX2::ushort_v a, AVX2::ushort_v b) { return not_(AVX::cmpgt_epu16(a.data(), b.data())); }
|
||||
Vc_INTRINSIC AVX2:: int_m operator> (AVX2:: int_v a, AVX2:: int_v b) { return AVX::cmpgt_epi32(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: uint_m operator> (AVX2:: uint_v a, AVX2:: uint_v b) { return AVX::cmpgt_epu32(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: short_m operator> (AVX2:: short_v a, AVX2:: short_v b) { return AVX::cmpgt_epi16(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::ushort_m operator> (AVX2::ushort_v a, AVX2::ushort_v b) { return AVX::cmpgt_epu16(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: int_m operator< (AVX2:: int_v a, AVX2:: int_v b) { return AVX::cmplt_epi32(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: uint_m operator< (AVX2:: uint_v a, AVX2:: uint_v b) { return AVX::cmplt_epu32(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2:: short_m operator< (AVX2:: short_v a, AVX2:: short_v b) { return AVX::cmplt_epi16(a.data(), b.data()); }
|
||||
Vc_INTRINSIC AVX2::ushort_m operator< (AVX2::ushort_v a, AVX2::ushort_v b) { return AVX::cmplt_epu16(a.data(), b.data()); }
|
||||
#endif // Vc_IMPL_AVX2
|
||||
|
||||
// bitwise operators {{{1
|
||||
template <typename T>
|
||||
Vc_INTRINSIC AVX2::Vector<T> operator^(AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return xor_(a.data(), b.data());
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC AVX2::Vector<T> operator&(AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return and_(a.data(), b.data());
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC AVX2::Vector<T> operator|(AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return or_(a.data(), b.data());
|
||||
}
|
||||
// }}}1
|
||||
// arithmetic operators {{{1
|
||||
template <typename T>
|
||||
Vc_INTRINSIC AVX2::Vector<T> operator+(AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return add(a.data(), b.data(), T());
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC AVX2::Vector<T> operator-(AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return sub(a.data(), b.data(), T());
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC AVX2::Vector<T> operator*(AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return mul(a.data(), b.data(), T());
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC AVX2::Vector<T> operator/(AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return div(a.data(), b.data(), T());
|
||||
}
|
||||
Vc_INTRINSIC AVX2::Vector<ushort> operator/(AVX2::Vector<ushort> a,
|
||||
AVX2::Vector<ushort> b)
|
||||
{
|
||||
using namespace AVX;
|
||||
const __m256 lo = _mm256_div_ps(convert<ushort, float>(lo128(a.data())),
|
||||
convert<ushort, float>(lo128(b.data())));
|
||||
const __m256 hi = _mm256_div_ps(convert<ushort, float>(hi128(a.data())),
|
||||
convert<ushort, float>(hi128(b.data())));
|
||||
const float_v threshold = 32767.f;
|
||||
using Detail::operator>;
|
||||
const __m128i loShort = (Vc_IS_UNLIKELY((float_v(lo) > threshold).isNotEmpty()))
|
||||
? convert<float, ushort>(lo)
|
||||
: convert<float, short>(lo);
|
||||
const __m128i hiShort = (Vc_IS_UNLIKELY((float_v(hi) > threshold).isNotEmpty()))
|
||||
? convert<float, ushort>(hi)
|
||||
: convert<float, short>(hi);
|
||||
return concat(loShort, hiShort);
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC enable_if<std::is_integral<T>::value, AVX2::Vector<T>> operator%(
|
||||
AVX2::Vector<T> a, AVX2::Vector<T> b)
|
||||
{
|
||||
return a - a / b * b;
|
||||
}
|
||||
// }}}1
|
||||
} // namespace Detail
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// generate {{{1
|
||||
template <> template <typename G> Vc_INTRINSIC AVX2::double_v AVX2::double_v::generate(G gen)
|
||||
{
|
||||
const auto tmp0 = gen(0);
|
||||
const auto tmp1 = gen(1);
|
||||
const auto tmp2 = gen(2);
|
||||
const auto tmp3 = gen(3);
|
||||
return _mm256_setr_pd(tmp0, tmp1, tmp2, tmp3);
|
||||
}
|
||||
template <> template <typename G> Vc_INTRINSIC AVX2::float_v AVX2::float_v::generate(G gen)
|
||||
{
|
||||
const auto tmp0 = gen(0);
|
||||
const auto tmp1 = gen(1);
|
||||
const auto tmp2 = gen(2);
|
||||
const auto tmp3 = gen(3);
|
||||
const auto tmp4 = gen(4);
|
||||
const auto tmp5 = gen(5);
|
||||
const auto tmp6 = gen(6);
|
||||
const auto tmp7 = gen(7);
|
||||
return _mm256_setr_ps(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||
}
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <> template <typename G> Vc_INTRINSIC AVX2::int_v AVX2::int_v::generate(G gen)
|
||||
{
|
||||
const auto tmp0 = gen(0);
|
||||
const auto tmp1 = gen(1);
|
||||
const auto tmp2 = gen(2);
|
||||
const auto tmp3 = gen(3);
|
||||
const auto tmp4 = gen(4);
|
||||
const auto tmp5 = gen(5);
|
||||
const auto tmp6 = gen(6);
|
||||
const auto tmp7 = gen(7);
|
||||
return _mm256_setr_epi32(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||
}
|
||||
template <> template <typename G> Vc_INTRINSIC AVX2::uint_v AVX2::uint_v::generate(G gen)
|
||||
{
|
||||
const auto tmp0 = gen(0);
|
||||
const auto tmp1 = gen(1);
|
||||
const auto tmp2 = gen(2);
|
||||
const auto tmp3 = gen(3);
|
||||
const auto tmp4 = gen(4);
|
||||
const auto tmp5 = gen(5);
|
||||
const auto tmp6 = gen(6);
|
||||
const auto tmp7 = gen(7);
|
||||
return _mm256_setr_epi32(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||
}
|
||||
template <> template <typename G> Vc_INTRINSIC AVX2::short_v AVX2::short_v::generate(G gen)
|
||||
{
|
||||
const auto tmp0 = gen(0);
|
||||
const auto tmp1 = gen(1);
|
||||
const auto tmp2 = gen(2);
|
||||
const auto tmp3 = gen(3);
|
||||
const auto tmp4 = gen(4);
|
||||
const auto tmp5 = gen(5);
|
||||
const auto tmp6 = gen(6);
|
||||
const auto tmp7 = gen(7);
|
||||
const auto tmp8 = gen(8);
|
||||
const auto tmp9 = gen(9);
|
||||
const auto tmp10 = gen(10);
|
||||
const auto tmp11 = gen(11);
|
||||
const auto tmp12 = gen(12);
|
||||
const auto tmp13 = gen(13);
|
||||
const auto tmp14 = gen(14);
|
||||
const auto tmp15 = gen(15);
|
||||
return _mm256_setr_epi16(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15);
|
||||
}
|
||||
template <> template <typename G> Vc_INTRINSIC AVX2::ushort_v AVX2::ushort_v::generate(G gen)
|
||||
{
|
||||
const auto tmp0 = gen(0);
|
||||
const auto tmp1 = gen(1);
|
||||
const auto tmp2 = gen(2);
|
||||
const auto tmp3 = gen(3);
|
||||
const auto tmp4 = gen(4);
|
||||
const auto tmp5 = gen(5);
|
||||
const auto tmp6 = gen(6);
|
||||
const auto tmp7 = gen(7);
|
||||
const auto tmp8 = gen(8);
|
||||
const auto tmp9 = gen(9);
|
||||
const auto tmp10 = gen(10);
|
||||
const auto tmp11 = gen(11);
|
||||
const auto tmp12 = gen(12);
|
||||
const auto tmp13 = gen(13);
|
||||
const auto tmp14 = gen(14);
|
||||
const auto tmp15 = gen(15);
|
||||
return _mm256_setr_epi16(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15);
|
||||
}
|
||||
#endif
|
||||
|
||||
// constants {{{1
|
||||
template <typename T> Vc_INTRINSIC Vector<T, VectorAbi::Avx>::Vector(VectorSpecialInitializerZero) : d{} {}
|
||||
|
||||
template <> Vc_INTRINSIC Vector<double, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_pd()) {}
|
||||
template <> Vc_INTRINSIC Vector< float, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_ps()) {}
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <> Vc_INTRINSIC Vector< int, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epi32()) {}
|
||||
template <> Vc_INTRINSIC Vector< uint, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epu32()) {}
|
||||
template <> Vc_INTRINSIC Vector< short, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epi16()) {}
|
||||
template <> Vc_INTRINSIC Vector<ushort, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epu16()) {}
|
||||
template <> Vc_INTRINSIC Vector< schar, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epi8()) {}
|
||||
template <> Vc_INTRINSIC Vector< uchar, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epu8()) {}
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE Vector<T, VectorAbi::Avx>::Vector(
|
||||
VectorSpecialInitializerIndexesFromZero)
|
||||
: Vector(AVX::IndexesFromZeroData<T>::address(), Vc::Aligned)
|
||||
{
|
||||
}
|
||||
|
||||
template <>
|
||||
Vc_ALWAYS_INLINE Vector<float, VectorAbi::Avx>::Vector(VectorSpecialInitializerIndexesFromZero)
|
||||
: Vector(AVX::IndexesFromZeroData<int>::address(), Vc::Aligned)
|
||||
{
|
||||
}
|
||||
template <>
|
||||
Vc_ALWAYS_INLINE Vector<double, VectorAbi::Avx>::Vector(VectorSpecialInitializerIndexesFromZero)
|
||||
: Vector(AVX::IndexesFromZeroData<int>::address(), Vc::Aligned)
|
||||
{
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// load member functions {{{1
|
||||
// general load, implemented via LoadHelper {{{2
|
||||
template <typename DstT>
|
||||
template <typename SrcT, typename Flags>
|
||||
Vc_INTRINSIC typename Vector<DstT, VectorAbi::Avx>::
|
||||
#ifndef Vc_MSVC
|
||||
template
|
||||
#endif
|
||||
load_concept<SrcT, Flags>::type Vector<DstT, VectorAbi::Avx>::load(const SrcT *mem, Flags flags)
|
||||
{
|
||||
Common::handleLoadPrefetches(mem, flags);
|
||||
d.v() = Detail::load<VectorType, DstT>(mem, flags);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// zeroing {{{1
|
||||
template<typename T> Vc_INTRINSIC void Vector<T, VectorAbi::Avx>::setZero()
|
||||
{
|
||||
data() = Detail::zero<VectorType>();
|
||||
}
|
||||
template<typename T> Vc_INTRINSIC void Vector<T, VectorAbi::Avx>::setZero(const Mask &k)
|
||||
{
|
||||
data() = Detail::andnot_(k.data(), data());
|
||||
}
|
||||
template<typename T> Vc_INTRINSIC void Vector<T, VectorAbi::Avx>::setZeroInverted(const Mask &k)
|
||||
{
|
||||
data() = Detail::and_(k.data(), data());
|
||||
}
|
||||
|
||||
template<> Vc_INTRINSIC void Vector<double, VectorAbi::Avx>::setQnan()
|
||||
{
|
||||
data() = Detail::allone<VectorType>();
|
||||
}
|
||||
template<> Vc_INTRINSIC void Vector<double, VectorAbi::Avx>::setQnan(MaskArgument k)
|
||||
{
|
||||
data() = _mm256_or_pd(data(), k.dataD());
|
||||
}
|
||||
template<> Vc_INTRINSIC void Vector<float, VectorAbi::Avx>::setQnan()
|
||||
{
|
||||
data() = Detail::allone<VectorType>();
|
||||
}
|
||||
template<> Vc_INTRINSIC void Vector<float, VectorAbi::Avx>::setQnan(MaskArgument k)
|
||||
{
|
||||
data() = _mm256_or_ps(data(), k.dataF());
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// stores {{{1
|
||||
template <typename T>
|
||||
template <typename U,
|
||||
typename Flags,
|
||||
typename>
|
||||
Vc_INTRINSIC void Vector<T, VectorAbi::Avx>::store(U *mem, Flags flags) const
|
||||
{
|
||||
Common::handleStorePrefetches(mem, flags);
|
||||
HV::template store<Flags>(mem, data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename U,
|
||||
typename Flags,
|
||||
typename>
|
||||
Vc_INTRINSIC void Vector<T, VectorAbi::Avx>::store(U *mem, Mask mask, Flags flags) const
|
||||
{
|
||||
Common::handleStorePrefetches(mem, flags);
|
||||
HV::template store<Flags>(mem, data(), mask.data());
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// integer ops {{{1
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector< int> Vector< int, VectorAbi::Avx>::operator<<(AsArg x) const { return _mm256_sllv_epi32(d.v(), x.d.v()); }
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector< uint> Vector< uint, VectorAbi::Avx>::operator<<(AsArg x) const { return _mm256_sllv_epi32(d.v(), x.d.v()); }
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector< int> Vector< int, VectorAbi::Avx>::operator>>(AsArg x) const { return _mm256_srav_epi32(d.v(), x.d.v()); }
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector< uint> Vector< uint, VectorAbi::Avx>::operator>>(AsArg x) const { return _mm256_srlv_epi32(d.v(), x.d.v()); }
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector< short> Vector< short, VectorAbi::Avx>::operator<<(AsArg x) const { return generate([&](int i) { return get(*this, i) << get(x, i); }); }
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector<ushort> Vector<ushort, VectorAbi::Avx>::operator<<(AsArg x) const { return generate([&](int i) { return get(*this, i) << get(x, i); }); }
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector< short> Vector< short, VectorAbi::Avx>::operator>>(AsArg x) const { return generate([&](int i) { return get(*this, i) >> get(x, i); }); }
|
||||
template <> Vc_ALWAYS_INLINE AVX2::Vector<ushort> Vector<ushort, VectorAbi::Avx>::operator>>(AsArg x) const { return generate([&](int i) { return get(*this, i) >> get(x, i); }); }
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE AVX2::Vector<T> &Vector<T, VectorAbi::Avx>::operator<<=(AsArg x)
|
||||
{
|
||||
static_assert(std::is_integral<T>::value,
|
||||
"bitwise-operators can only be used with Vectors of integral type");
|
||||
return *this = *this << x;
|
||||
}
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE AVX2::Vector<T> &Vector<T, VectorAbi::Avx>::operator>>=(AsArg x)
|
||||
{
|
||||
static_assert(std::is_integral<T>::value,
|
||||
"bitwise-operators can only be used with Vectors of integral type");
|
||||
return *this = *this >> x;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename T> Vc_ALWAYS_INLINE AVX2::Vector<T> &Vector<T, VectorAbi::Avx>::operator>>=(int shift) {
|
||||
d.v() = Detail::shiftRight(d.v(), shift, T());
|
||||
return *static_cast<AVX2::Vector<T> *>(this);
|
||||
}
|
||||
template<typename T> Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> Vector<T, VectorAbi::Avx>::operator>>(int shift) const {
|
||||
return Detail::shiftRight(d.v(), shift, T());
|
||||
}
|
||||
template<typename T> Vc_ALWAYS_INLINE AVX2::Vector<T> &Vector<T, VectorAbi::Avx>::operator<<=(int shift) {
|
||||
d.v() = Detail::shiftLeft(d.v(), shift, T());
|
||||
return *static_cast<AVX2::Vector<T> *>(this);
|
||||
}
|
||||
template<typename T> Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> Vector<T, VectorAbi::Avx>::operator<<(int shift) const {
|
||||
return Detail::shiftLeft(d.v(), shift, T());
|
||||
}
|
||||
|
||||
// isnegative {{{1
|
||||
Vc_INTRINSIC Vc_CONST AVX2::float_m isnegative(AVX2::float_v x)
|
||||
{
|
||||
return AVX::avx_cast<__m256>(AVX::srai_epi32<31>(
|
||||
AVX::avx_cast<__m256i>(_mm256_and_ps(AVX::setsignmask_ps(), x.data()))));
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST AVX2::double_m isnegative(AVX2::double_v x)
|
||||
{
|
||||
return Mem::permute<X1, X1, X3, X3>(AVX::avx_cast<__m256>(AVX::srai_epi32<31>(
|
||||
AVX::avx_cast<__m256i>(_mm256_and_pd(AVX::setsignmask_pd(), x.data())))));
|
||||
}
|
||||
// gathers {{{1
|
||||
#define Vc_GATHER_IMPL(V_) \
|
||||
template <> \
|
||||
template <class MT, class IT, int Scale> \
|
||||
inline void AVX2::V_::gatherImplementation( \
|
||||
const Common::GatherArguments<MT, IT, Scale> &args)
|
||||
#define Vc_M(i_) static_cast<value_type>(args.address[Scale * args.indexes[i_]])
|
||||
Vc_GATHER_IMPL(double_v) { d.v() = _mm256_setr_pd(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3)); }
|
||||
|
||||
Vc_GATHER_IMPL(float_v)
|
||||
{
|
||||
d.v() = _mm256_setr_ps(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5), Vc_M(6),
|
||||
Vc_M(7));
|
||||
}
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
Vc_GATHER_IMPL(int_v)
|
||||
{
|
||||
d.v() = _mm256_setr_epi32(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5),
|
||||
Vc_M(6), Vc_M(7));
|
||||
}
|
||||
|
||||
Vc_GATHER_IMPL(uint_v)
|
||||
{
|
||||
d.v() = _mm256_setr_epi32(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5),
|
||||
Vc_M(6), Vc_M(7));
|
||||
}
|
||||
|
||||
Vc_GATHER_IMPL(short_v)
|
||||
{
|
||||
d.v() = _mm256_setr_epi16(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5),
|
||||
Vc_M(6), Vc_M(7), Vc_M(8), Vc_M(9), Vc_M(10), Vc_M(11),
|
||||
Vc_M(12), Vc_M(13), Vc_M(14), Vc_M(15));
|
||||
}
|
||||
|
||||
Vc_GATHER_IMPL(ushort_v)
|
||||
{
|
||||
d.v() = _mm256_setr_epi16(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5),
|
||||
Vc_M(6), Vc_M(7), Vc_M(8), Vc_M(9), Vc_M(10), Vc_M(11),
|
||||
Vc_M(12), Vc_M(13), Vc_M(14), Vc_M(15));
|
||||
}
|
||||
#endif
|
||||
#undef Vc_M
|
||||
#undef Vc_GATHER_IMPL
|
||||
|
||||
template <class T>
|
||||
template <class MT, class IT, int Scale>
|
||||
inline void Vector<T, VectorAbi::Avx>::gatherImplementation(
|
||||
const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
|
||||
{
|
||||
const auto *mem = args.address;
|
||||
const auto indexes = Scale * args.indexes;
|
||||
using Selector = std::integral_constant < Common::GatherScatterImplementation,
|
||||
#ifdef Vc_USE_SET_GATHERS
|
||||
Traits::is_simd_vector<IT>::value ? Common::GatherScatterImplementation::SetIndexZero :
|
||||
#endif
|
||||
#ifdef Vc_USE_BSF_GATHERS
|
||||
Common::GatherScatterImplementation::BitScanLoop
|
||||
#elif defined Vc_USE_POPCNT_BSF_GATHERS
|
||||
Common::GatherScatterImplementation::PopcntSwitch
|
||||
#else
|
||||
Common::GatherScatterImplementation::SimpleLoop
|
||||
#endif
|
||||
> ;
|
||||
Common::executeGather(Selector(), *this, mem, indexes, mask);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename MT, typename IT>
|
||||
inline void Vector<T, VectorAbi::Avx>::scatterImplementation(MT *mem, IT &&indexes) const
|
||||
{
|
||||
Common::unrolled_loop<std::size_t, 0, Size>([&](std::size_t i) { mem[indexes[i]] = d.m(i); });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename MT, typename IT>
|
||||
inline void Vector<T, VectorAbi::Avx>::scatterImplementation(MT *mem, IT &&indexes, MaskArgument mask) const
|
||||
{
|
||||
using Selector = std::integral_constant < Common::GatherScatterImplementation,
|
||||
#ifdef Vc_USE_SET_GATHERS
|
||||
Traits::is_simd_vector<IT>::value ? Common::GatherScatterImplementation::SetIndexZero :
|
||||
#endif
|
||||
#ifdef Vc_USE_BSF_GATHERS
|
||||
Common::GatherScatterImplementation::BitScanLoop
|
||||
#elif defined Vc_USE_POPCNT_BSF_GATHERS
|
||||
Common::GatherScatterImplementation::PopcntSwitch
|
||||
#else
|
||||
Common::GatherScatterImplementation::SimpleLoop
|
||||
#endif
|
||||
> ;
|
||||
Common::executeScatter(Selector(), *this, mem, std::forward<IT>(indexes), mask);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// operator- {{{1
|
||||
#ifdef Vc_USE_BUILTIN_VECTOR_TYPES
|
||||
template<typename T> Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> Vector<T, VectorAbi::Avx>::operator-() const
|
||||
{
|
||||
return VectorType(-d.builtin());
|
||||
}
|
||||
#else
|
||||
template<typename T> Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector<T> Vector<T, VectorAbi::Avx>::operator-() const
|
||||
{
|
||||
return Detail::negate(d.v(), std::integral_constant<std::size_t, sizeof(T)>());
|
||||
}
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// horizontal ops {{{1
|
||||
template <typename T>
|
||||
Vc_INTRINSIC std::pair<Vector<T, VectorAbi::Avx>, int>
|
||||
Vector<T, VectorAbi::Avx>::minIndex() const
|
||||
{
|
||||
AVX2::Vector<T> x = min();
|
||||
return std::make_pair(x, (*this == x).firstOne());
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC std::pair<Vector<T, VectorAbi::Avx>, int>
|
||||
Vector<T, VectorAbi::Avx>::maxIndex() const
|
||||
{
|
||||
AVX2::Vector<T> x = max();
|
||||
return std::make_pair(x, (*this == x).firstOne());
|
||||
}
|
||||
template <> Vc_INTRINSIC std::pair<AVX2::float_v, int> AVX2::float_v::minIndex() const
|
||||
{
|
||||
/*
|
||||
// 28 cycles latency:
|
||||
__m256 x = _mm256_min_ps(Mem::permute128<X1, X0>(d.v()), d.v());
|
||||
x = _mm256_min_ps(x, Reg::permute<X2, X3, X0, X1>(x));
|
||||
AVX2::float_v xx = _mm256_min_ps(x, Reg::permute<X1, X0, X3, X2>(x));
|
||||
AVX2::uint_v idx = AVX2::uint_v::IndexesFromZero();
|
||||
idx = _mm256_castps_si256(
|
||||
_mm256_or_ps((*this != xx).data(), _mm256_castsi256_ps(idx.data())));
|
||||
return std::make_pair(xx, (*this == xx).firstOne());
|
||||
|
||||
__m128 loData = AVX::lo128(d.v());
|
||||
__m128 hiData = AVX::hi128(d.v());
|
||||
const __m128 less2 = _mm_cmplt_ps(hiData, loData);
|
||||
loData = _mm_min_ps(loData, hiData);
|
||||
hiData = Mem::permute<X2, X3, X0, X1>(loData);
|
||||
const __m128 less1 = _mm_cmplt_ps(hiData, loData);
|
||||
loData = _mm_min_ps(loData, hiData);
|
||||
hiData = Mem::permute<X1, X0, X3, X2>(loData);
|
||||
const __m128 less0 = _mm_cmplt_ps(hiData, loData);
|
||||
unsigned bits = _mm_movemask_ps(less0) & 0x1;
|
||||
bits |= ((_mm_movemask_ps(less1) << 1) - bits) & 0x2;
|
||||
bits |= ((_mm_movemask_ps(less2) << 3) - bits) & 0x4;
|
||||
loData = _mm_min_ps(loData, hiData);
|
||||
return std::make_pair(AVX::concat(loData, loData), bits);
|
||||
*/
|
||||
|
||||
// 28 cycles Latency:
|
||||
__m256 x = d.v();
|
||||
__m256 idx = Vector<float>::IndexesFromZero().data();
|
||||
__m256 y = Mem::permute128<X1, X0>(x);
|
||||
__m256 idy = Mem::permute128<X1, X0>(idx);
|
||||
__m256 less = AVX::cmplt_ps(x, y);
|
||||
|
||||
x = _mm256_blendv_ps(y, x, less);
|
||||
idx = _mm256_blendv_ps(idy, idx, less);
|
||||
y = Reg::permute<X2, X3, X0, X1>(x);
|
||||
idy = Reg::permute<X2, X3, X0, X1>(idx);
|
||||
less = AVX::cmplt_ps(x, y);
|
||||
|
||||
x = _mm256_blendv_ps(y, x, less);
|
||||
idx = _mm256_blendv_ps(idy, idx, less);
|
||||
y = Reg::permute<X1, X0, X3, X2>(x);
|
||||
idy = Reg::permute<X1, X0, X3, X2>(idx);
|
||||
less = AVX::cmplt_ps(x, y);
|
||||
|
||||
idx = _mm256_blendv_ps(idy, idx, less);
|
||||
|
||||
const auto index = _mm_cvtsi128_si32(AVX::avx_cast<__m128i>(idx));
|
||||
#ifdef Vc_GNU_ASM
|
||||
__asm__ __volatile__(""); // help GCC to order the instructions better
|
||||
#endif
|
||||
x = _mm256_blendv_ps(y, x, less);
|
||||
return std::make_pair(x, index);
|
||||
}
|
||||
template<typename T> Vc_ALWAYS_INLINE AVX2::Vector<T> Vector<T, VectorAbi::Avx>::partialSum() const
|
||||
{
|
||||
// a b c d e f g h
|
||||
// + a b c d e f g -> a ab bc cd de ef fg gh
|
||||
// + a ab bc cd de ef -> a ab abc abcd bcde cdef defg efgh
|
||||
// + a ab abc abcd -> a ab abc abcd abcde abcdef abcdefg abcdefgh
|
||||
AVX2::Vector<T> tmp = *this;
|
||||
if (Size > 1) tmp += tmp.shifted(-1);
|
||||
if (Size > 2) tmp += tmp.shifted(-2);
|
||||
if (Size > 4) tmp += tmp.shifted(-4);
|
||||
if (Size > 8) tmp += tmp.shifted(-8);
|
||||
if (Size > 16) tmp += tmp.shifted(-16);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/* This function requires correct masking because the neutral element of \p op is not necessarily 0
|
||||
*
|
||||
template<typename T> template<typename BinaryOperation> Vc_ALWAYS_INLINE AVX2::Vector<T> Vector<T, VectorAbi::Avx>::partialSum(BinaryOperation op) const
|
||||
{
|
||||
// a b c d e f g h
|
||||
// + a b c d e f g -> a ab bc cd de ef fg gh
|
||||
// + a ab bc cd de ef -> a ab abc abcd bcde cdef defg efgh
|
||||
// + a ab abc abcd -> a ab abc abcd abcde abcdef abcdefg abcdefgh
|
||||
AVX2::Vector<T> tmp = *this;
|
||||
Mask mask(true);
|
||||
if (Size > 1) tmp(mask) = op(tmp, tmp.shifted(-1));
|
||||
if (Size > 2) tmp(mask) = op(tmp, tmp.shifted(-2));
|
||||
if (Size > 4) tmp(mask) = op(tmp, tmp.shifted(-4));
|
||||
if (Size > 8) tmp(mask) = op(tmp, tmp.shifted(-8));
|
||||
if (Size > 16) tmp(mask) = op(tmp, tmp.shifted(-16));
|
||||
return tmp;
|
||||
}
|
||||
*/
|
||||
|
||||
template<typename T> Vc_ALWAYS_INLINE typename Vector<T, VectorAbi::Avx>::EntryType Vector<T, VectorAbi::Avx>::min(MaskArgument m) const
|
||||
{
|
||||
AVX2::Vector<T> tmp = std::numeric_limits<AVX2::Vector<T> >::max();
|
||||
tmp(m) = *this;
|
||||
return tmp.min();
|
||||
}
|
||||
template<typename T> Vc_ALWAYS_INLINE typename Vector<T, VectorAbi::Avx>::EntryType Vector<T, VectorAbi::Avx>::max(MaskArgument m) const
|
||||
{
|
||||
AVX2::Vector<T> tmp = std::numeric_limits<AVX2::Vector<T> >::min();
|
||||
tmp(m) = *this;
|
||||
return tmp.max();
|
||||
}
|
||||
template<typename T> Vc_ALWAYS_INLINE typename Vector<T, VectorAbi::Avx>::EntryType Vector<T, VectorAbi::Avx>::product(MaskArgument m) const
|
||||
{
|
||||
AVX2::Vector<T> tmp(Vc::One);
|
||||
tmp(m) = *this;
|
||||
return tmp.product();
|
||||
}
|
||||
template<typename T> Vc_ALWAYS_INLINE typename Vector<T, VectorAbi::Avx>::EntryType Vector<T, VectorAbi::Avx>::sum(MaskArgument m) const
|
||||
{
|
||||
AVX2::Vector<T> tmp(Vc::Zero);
|
||||
tmp(m) = *this;
|
||||
return tmp.sum();
|
||||
}//}}}
|
||||
// exponent {{{1
|
||||
namespace Detail
|
||||
{
|
||||
Vc_INTRINSIC Vc_CONST __m256 exponent(__m256 v)
|
||||
{
|
||||
using namespace AVX;
|
||||
__m128i tmp0 = _mm_srli_epi32(avx_cast<__m128i>(v), 23);
|
||||
__m128i tmp1 = _mm_srli_epi32(avx_cast<__m128i>(hi128(v)), 23);
|
||||
tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x7f));
|
||||
tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x7f));
|
||||
return _mm256_cvtepi32_ps(concat(tmp0, tmp1));
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST __m256d exponent(__m256d v)
|
||||
{
|
||||
using namespace AVX;
|
||||
__m128i tmp0 = _mm_srli_epi64(avx_cast<__m128i>(v), 52);
|
||||
__m128i tmp1 = _mm_srli_epi64(avx_cast<__m128i>(hi128(v)), 52);
|
||||
tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x3ff));
|
||||
tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x3ff));
|
||||
return _mm256_cvtepi32_pd(avx_cast<__m128i>(Mem::shuffle<X0, X2, Y0, Y2>(avx_cast<__m128>(tmp0), avx_cast<__m128>(tmp1))));
|
||||
}
|
||||
} // namespace Detail
|
||||
|
||||
Vc_INTRINSIC Vc_CONST AVX2::float_v exponent(AVX2::float_v x)
|
||||
{
|
||||
using Detail::operator>=;
|
||||
Vc_ASSERT((x >= x.Zero()).isFull());
|
||||
return Detail::exponent(x.data());
|
||||
}
|
||||
Vc_INTRINSIC Vc_CONST AVX2::double_v exponent(AVX2::double_v x)
|
||||
{
|
||||
using Detail::operator>=;
|
||||
Vc_ASSERT((x >= x.Zero()).isFull());
|
||||
return Detail::exponent(x.data());
|
||||
}
|
||||
// }}}1
|
||||
// Random {{{1
|
||||
static Vc_ALWAYS_INLINE __m256i _doRandomStep()
|
||||
{
|
||||
using Detail::operator*;
|
||||
using Detail::operator+;
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
using AVX2::uint_v;
|
||||
uint_v state0(&Common::RandomState[0]);
|
||||
uint_v state1(&Common::RandomState[uint_v::Size]);
|
||||
(state1 * uint_v(0xdeece66du) + uint_v(11)).store(&Common::RandomState[uint_v::Size]);
|
||||
uint_v(Detail::xor_((state0 * uint_v(0xdeece66du) + uint_v(11)).data(),
|
||||
_mm256_srli_epi32(state1.data(), 16)))
|
||||
.store(&Common::RandomState[0]);
|
||||
return state0.data();
|
||||
#else
|
||||
using SSE::uint_v;
|
||||
uint_v state0(&Common::RandomState[0]);
|
||||
uint_v state1(&Common::RandomState[uint_v::Size]);
|
||||
uint_v state2(&Common::RandomState[2 * uint_v::Size]);
|
||||
uint_v state3(&Common::RandomState[3 * uint_v::Size]);
|
||||
(state2 * uint_v(0xdeece66du) + uint_v(11))
|
||||
.store(&Common::RandomState[2 * uint_v::Size]);
|
||||
(state3 * uint_v(0xdeece66du) + uint_v(11))
|
||||
.store(&Common::RandomState[3 * uint_v::Size]);
|
||||
uint_v(Detail::xor_((state0 * uint_v(0xdeece66du) + uint_v(11)).data(),
|
||||
_mm_srli_epi32(state2.data(), 16)))
|
||||
.store(&Common::RandomState[0]);
|
||||
uint_v(Detail::xor_((state1 * uint_v(0xdeece66du) + uint_v(11)).data(),
|
||||
_mm_srli_epi32(state3.data(), 16)))
|
||||
.store(&Common::RandomState[uint_v::Size]);
|
||||
return AVX::concat(state0.data(), state1.data());
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template<typename T> Vc_ALWAYS_INLINE AVX2::Vector<T> Vector<T, VectorAbi::Avx>::Random()
|
||||
{
|
||||
return {_doRandomStep()};
|
||||
}
|
||||
#endif
|
||||
|
||||
template <> Vc_ALWAYS_INLINE AVX2::float_v AVX2::float_v::Random()
|
||||
{
|
||||
return HT::sub(Detail::or_(_cast(AVX::srli_epi32<2>(_doRandomStep())), HT::one()),
|
||||
HT::one());
|
||||
}
|
||||
|
||||
template<> Vc_ALWAYS_INLINE AVX2::double_v AVX2::double_v::Random()
|
||||
{
|
||||
const __m256i state = Detail::load(&Common::RandomState[0], Vc::Aligned,
|
||||
Detail::LoadTag<__m256i, int>());
|
||||
for (size_t k = 0; k < 8; k += 2) {
|
||||
typedef unsigned long long uint64 Vc_MAY_ALIAS;
|
||||
const uint64 stateX = *aliasing_cast<uint64>(&Common::RandomState[k]);
|
||||
*aliasing_cast<uint64>(&Common::RandomState[k]) = (stateX * 0x5deece66dull + 11);
|
||||
}
|
||||
return HT::sub(Detail::or_(_cast(AVX::srli_epi64<12>(state)), HT::one()), HT::one());
|
||||
}
|
||||
// }}}1
|
||||
// shifted / rotated {{{1
|
||||
template<typename T> Vc_INTRINSIC AVX2::Vector<T> Vector<T, VectorAbi::Avx>::shifted(int amount) const
|
||||
{
|
||||
return Detail::shifted<EntryType>(d.v(), amount);
|
||||
}
|
||||
|
||||
template <typename VectorType>
|
||||
Vc_INTRINSIC Vc_CONST VectorType shifted_shortcut(VectorType left, VectorType right, Common::WidthT<__m128>)
|
||||
{
|
||||
return Mem::shuffle<X2, X3, Y0, Y1>(left, right);
|
||||
}
|
||||
template <typename VectorType>
|
||||
Vc_INTRINSIC Vc_CONST VectorType shifted_shortcut(VectorType left, VectorType right, Common::WidthT<__m256>)
|
||||
{
|
||||
return Mem::shuffle128<X1, Y0>(left, right);
|
||||
}
|
||||
|
||||
template<typename T> Vc_INTRINSIC AVX2::Vector<T> Vector<T, VectorAbi::Avx>::shifted(int amount, Vector shiftIn) const
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
if (__builtin_constant_p(amount)) {
|
||||
const __m256i a = AVX::avx_cast<__m256i>(d.v());
|
||||
const __m256i b = AVX::avx_cast<__m256i>(shiftIn.d.v());
|
||||
if (amount * 2 == int(Size)) {
|
||||
return shifted_shortcut(d.v(), shiftIn.d.v(), WidthT());
|
||||
}
|
||||
if (amount * 2 == -int(Size)) {
|
||||
return shifted_shortcut(shiftIn.d.v(), d.v(), WidthT());
|
||||
}
|
||||
switch (amount) {
|
||||
case 1:
|
||||
return AVX::avx_cast<VectorType>(
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
_mm256_alignr_epi8(_mm256_permute2x128_si256(a, b, 0x21), a,
|
||||
sizeof(EntryType))
|
||||
#else // Vc_IMPL_AVX2
|
||||
AVX::concat(
|
||||
_mm_alignr_epi8(AVX::hi128(a), AVX::lo128(a), sizeof(EntryType)),
|
||||
_mm_alignr_epi8(AVX::lo128(b), AVX::hi128(a), sizeof(EntryType)))
|
||||
#endif // Vc_IMPL_AVX2
|
||||
);
|
||||
case 2:
|
||||
return AVX::avx_cast<VectorType>(
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
_mm256_alignr_epi8(_mm256_permute2x128_si256(a, b, 0x21), a,
|
||||
2 * sizeof(EntryType))
|
||||
#else // Vc_IMPL_AVX2
|
||||
AVX::concat(
|
||||
_mm_alignr_epi8(AVX::hi128(a), AVX::lo128(a), 2 * sizeof(EntryType)),
|
||||
_mm_alignr_epi8(AVX::lo128(b), AVX::hi128(a), 2 * sizeof(EntryType)))
|
||||
#endif // Vc_IMPL_AVX2
|
||||
);
|
||||
case 3:
|
||||
if (6u < Size) {
|
||||
return AVX::avx_cast<VectorType>(
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
_mm256_alignr_epi8(_mm256_permute2x128_si256(a, b, 0x21), a,
|
||||
3 * sizeof(EntryType))
|
||||
#else // Vc_IMPL_AVX2
|
||||
AVX::concat(_mm_alignr_epi8(AVX::hi128(a), AVX::lo128(a),
|
||||
3 * sizeof(EntryType)),
|
||||
_mm_alignr_epi8(AVX::lo128(b), AVX::hi128(a),
|
||||
3 * sizeof(EntryType)))
|
||||
#endif // Vc_IMPL_AVX2
|
||||
);
|
||||
// TODO: } else {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
using Detail::operator|;
|
||||
return shifted(amount) | (amount > 0 ?
|
||||
shiftIn.shifted(amount - Size) :
|
||||
shiftIn.shifted(Size + amount));
|
||||
}
|
||||
template<typename T> Vc_INTRINSIC AVX2::Vector<T> Vector<T, VectorAbi::Avx>::rotated(int amount) const
|
||||
{
|
||||
return Detail::rotated<EntryType, size()>(d.v(), amount);
|
||||
}
|
||||
// sorted {{{1
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Avx> Vector<T, VectorAbi::Avx>::sorted()
|
||||
const
|
||||
{
|
||||
return Detail::sorted(*this);
|
||||
}
|
||||
// interleaveLow/-High {{{1
|
||||
template <> Vc_INTRINSIC AVX2::double_v AVX2::double_v::interleaveLow(AVX2::double_v x) const
|
||||
{
|
||||
return Mem::shuffle128<X0, Y0>(_mm256_unpacklo_pd(data(), x.data()),
|
||||
_mm256_unpackhi_pd(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::double_v AVX2::double_v::interleaveHigh(AVX2::double_v x) const
|
||||
{
|
||||
return Mem::shuffle128<X1, Y1>(_mm256_unpacklo_pd(data(), x.data()),
|
||||
_mm256_unpackhi_pd(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::float_v AVX2::float_v::interleaveLow(AVX2::float_v x) const
|
||||
{
|
||||
return Mem::shuffle128<X0, Y0>(_mm256_unpacklo_ps(data(), x.data()),
|
||||
_mm256_unpackhi_ps(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::float_v AVX2::float_v::interleaveHigh(AVX2::float_v x) const
|
||||
{
|
||||
return Mem::shuffle128<X1, Y1>(_mm256_unpacklo_ps(data(), x.data()),
|
||||
_mm256_unpackhi_ps(data(), x.data()));
|
||||
}
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <> Vc_INTRINSIC AVX2::int_v AVX2::int_v::interleaveLow ( AVX2::int_v x) const {
|
||||
return Mem::shuffle128<X0, Y0>(_mm256_unpacklo_epi32(data(), x.data()),
|
||||
_mm256_unpackhi_epi32(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::int_v AVX2::int_v::interleaveHigh( AVX2::int_v x) const {
|
||||
return Mem::shuffle128<X1, Y1>(_mm256_unpacklo_epi32(data(), x.data()),
|
||||
_mm256_unpackhi_epi32(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::uint_v AVX2::uint_v::interleaveLow ( AVX2::uint_v x) const {
|
||||
return Mem::shuffle128<X0, Y0>(_mm256_unpacklo_epi32(data(), x.data()),
|
||||
_mm256_unpackhi_epi32(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::uint_v AVX2::uint_v::interleaveHigh( AVX2::uint_v x) const {
|
||||
return Mem::shuffle128<X1, Y1>(_mm256_unpacklo_epi32(data(), x.data()),
|
||||
_mm256_unpackhi_epi32(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::short_v AVX2::short_v::interleaveLow ( AVX2::short_v x) const {
|
||||
return Mem::shuffle128<X0, Y0>(_mm256_unpacklo_epi16(data(), x.data()),
|
||||
_mm256_unpackhi_epi16(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::short_v AVX2::short_v::interleaveHigh( AVX2::short_v x) const {
|
||||
return Mem::shuffle128<X1, Y1>(_mm256_unpacklo_epi16(data(), x.data()),
|
||||
_mm256_unpackhi_epi16(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::ushort_v AVX2::ushort_v::interleaveLow (AVX2::ushort_v x) const {
|
||||
return Mem::shuffle128<X0, Y0>(_mm256_unpacklo_epi16(data(), x.data()),
|
||||
_mm256_unpackhi_epi16(data(), x.data()));
|
||||
}
|
||||
template <> Vc_INTRINSIC AVX2::ushort_v AVX2::ushort_v::interleaveHigh(AVX2::ushort_v x) const {
|
||||
return Mem::shuffle128<X1, Y1>(_mm256_unpacklo_epi16(data(), x.data()),
|
||||
_mm256_unpackhi_epi16(data(), x.data()));
|
||||
}
|
||||
#endif
|
||||
// permutation via operator[] {{{1
|
||||
template <> Vc_INTRINSIC Vc_PURE AVX2::double_v AVX2::double_v::operator[](Permutation::ReversedTag) const
|
||||
{
|
||||
return Mem::permute128<X1, X0>(Mem::permute<X1, X0, X3, X2>(d.v()));
|
||||
}
|
||||
template <> Vc_INTRINSIC Vc_PURE AVX2::float_v AVX2::float_v::operator[](Permutation::ReversedTag) const
|
||||
{
|
||||
return Mem::permute128<X1, X0>(Mem::permute<X3, X2, X1, X0>(d.v()));
|
||||
}
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
template <>
|
||||
Vc_INTRINSIC Vc_PURE AVX2::int_v AVX2::int_v::operator[](Permutation::ReversedTag) const
|
||||
{
|
||||
return Mem::permute128<X1, X0>(Mem::permute<X3, X2, X1, X0>(d.v()));
|
||||
}
|
||||
template <>
|
||||
Vc_INTRINSIC Vc_PURE AVX2::uint_v AVX2::uint_v::operator[](Permutation::ReversedTag) const
|
||||
{
|
||||
return Mem::permute128<X1, X0>(Mem::permute<X3, X2, X1, X0>(d.v()));
|
||||
}
|
||||
template <>
|
||||
Vc_INTRINSIC Vc_PURE AVX2::short_v AVX2::short_v::operator[](
|
||||
Permutation::ReversedTag) const
|
||||
{
|
||||
return Mem::permute128<X1, X0>(AVX::avx_cast<__m256i>(Mem::shuffle<X1, Y0, X3, Y2>(
|
||||
AVX::avx_cast<__m256d>(Mem::permuteHi<X7, X6, X5, X4>(d.v())),
|
||||
AVX::avx_cast<__m256d>(Mem::permuteLo<X3, X2, X1, X0>(d.v())))));
|
||||
}
|
||||
template <>
|
||||
Vc_INTRINSIC Vc_PURE AVX2::ushort_v AVX2::ushort_v::operator[](
|
||||
Permutation::ReversedTag) const
|
||||
{
|
||||
return Mem::permute128<X1, X0>(AVX::avx_cast<__m256i>(Mem::shuffle<X1, Y0, X3, Y2>(
|
||||
AVX::avx_cast<__m256d>(Mem::permuteHi<X7, X6, X5, X4>(d.v())),
|
||||
AVX::avx_cast<__m256d>(Mem::permuteLo<X3, X2, X1, X0>(d.v())))));
|
||||
}
|
||||
#endif
|
||||
template <> Vc_INTRINSIC AVX2::float_v Vector<float, VectorAbi::Avx>::operator[](const IndexType &/*perm*/) const
|
||||
{
|
||||
// TODO
|
||||
return *this;
|
||||
#ifdef Vc_IMPL_AVX2
|
||||
#else
|
||||
/*
|
||||
const int_m cross128 = AVX::concat(_mm_cmpgt_epi32(AVX::lo128(perm.data()), _mm_set1_epi32(3)),
|
||||
_mm_cmplt_epi32(AVX::hi128(perm.data()), _mm_set1_epi32(4)));
|
||||
if (cross128.isNotEmpty()) {
|
||||
AVX2::float_v x = _mm256_permutevar_ps(d.v(), perm.data());
|
||||
x(cross128) = _mm256_permutevar_ps(Mem::permute128<X1, X0>(d.v()), perm.data());
|
||||
return x;
|
||||
} else {
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
// reversed {{{1
|
||||
template <typename T>
|
||||
Vc_INTRINSIC Vc_PURE Vector<T, VectorAbi::Avx> Vector<T, VectorAbi::Avx>::reversed() const
|
||||
{
|
||||
return (*this)[Permutation::Reversed];
|
||||
}
|
||||
|
||||
// broadcast from constexpr index {{{1
|
||||
template <> template <int Index> Vc_INTRINSIC AVX2::float_v AVX2::float_v::broadcast() const
|
||||
{
|
||||
constexpr VecPos Inner = static_cast<VecPos>(Index & 0x3);
|
||||
constexpr VecPos Outer = static_cast<VecPos>((Index & 0x4) / 4);
|
||||
return Mem::permute<Inner, Inner, Inner, Inner>(Mem::permute128<Outer, Outer>(d.v()));
|
||||
}
|
||||
template <> template <int Index> Vc_INTRINSIC AVX2::double_v AVX2::double_v::broadcast() const
|
||||
{
|
||||
constexpr VecPos Inner = static_cast<VecPos>(Index & 0x1);
|
||||
constexpr VecPos Outer = static_cast<VecPos>((Index & 0x2) / 2);
|
||||
return Mem::permute<Inner, Inner>(Mem::permute128<Outer, Outer>(d.v()));
|
||||
}
|
||||
// }}}1
|
||||
} // namespace Vc
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,257 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_AVX_VECTORHELPER_H_
|
||||
#define VC_AVX_VECTORHELPER_H_
|
||||
|
||||
#include <limits>
|
||||
#include "types.h"
|
||||
#include "intrinsics.h"
|
||||
#include "casts.h"
|
||||
#include "../common/loadstoreflags.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
template<> struct VectorHelper<__m256>
|
||||
{
|
||||
typedef __m256 VectorType;
|
||||
typedef const VectorType VTArg;
|
||||
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfAligned = nullptr) { _mm256_store_ps(mem, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfUnalignedNotStreaming = nullptr) { _mm256_storeu_ps(mem, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfStreaming = nullptr) { _mm256_stream_ps(mem, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfUnalignedAndStreaming = nullptr) { AvxIntrinsics::stream_store(mem, x, setallone_ps()); }
|
||||
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, VTArg m, typename std::enable_if<!Flags::IsStreaming, void *>::type = nullptr) { _mm256_maskstore(mem, m, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, VTArg m, typename std::enable_if< Flags::IsStreaming, void *>::type = nullptr) { AvxIntrinsics::stream_store(mem, x, m); }
|
||||
};
|
||||
|
||||
template<> struct VectorHelper<__m256d>
|
||||
{
|
||||
typedef __m256d VectorType;
|
||||
typedef const VectorType VTArg;
|
||||
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfAligned = nullptr) { _mm256_store_pd(mem, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfUnalignedNotStreaming = nullptr) { _mm256_storeu_pd(mem, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfStreaming = nullptr) { _mm256_stream_pd(mem, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfUnalignedAndStreaming = nullptr) { AvxIntrinsics::stream_store(mem, x, setallone_pd()); }
|
||||
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, VTArg m, typename std::enable_if<!Flags::IsStreaming, void *>::type = nullptr) { _mm256_maskstore(mem, m, x); }
|
||||
template<typename Flags> static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, VTArg m, typename std::enable_if< Flags::IsStreaming, void *>::type = nullptr) { AvxIntrinsics::stream_store(mem, x, m); }
|
||||
};
|
||||
|
||||
template<> struct VectorHelper<__m256i>
|
||||
{
|
||||
typedef __m256i VectorType;
|
||||
typedef const VectorType VTArg;
|
||||
|
||||
template<typename Flags, typename T> static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfAligned = nullptr) { _mm256_store_si256(reinterpret_cast<__m256i *>(mem), x); }
|
||||
template<typename Flags, typename T> static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfUnalignedNotStreaming = nullptr) { _mm256_storeu_si256(reinterpret_cast<__m256i *>(mem), x); }
|
||||
template<typename Flags, typename T> static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfStreaming = nullptr) { _mm256_stream_si256(reinterpret_cast<__m256i *>(mem), x); }
|
||||
template<typename Flags, typename T> static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfUnalignedAndStreaming = nullptr) { AvxIntrinsics::stream_store(mem, x, setallone_si256()); }
|
||||
|
||||
template<typename Flags, typename T> static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, VTArg m, typename std::enable_if<!Flags::IsStreaming, void *>::type = nullptr) { _mm256_maskstore(mem, m, x); }
|
||||
template<typename Flags, typename T> static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, VTArg m, typename std::enable_if< Flags::IsStreaming, void *>::type = nullptr) { AvxIntrinsics::stream_store(mem, x, m); }
|
||||
};
|
||||
|
||||
#define Vc_OP1(op) \
|
||||
static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a) { return Vc_CAT2(_mm256_##op##_, Vc_SUFFIX)(a); }
|
||||
#define Vc_OP(op) \
|
||||
static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return Vc_CAT2(op##_ , Vc_SUFFIX)(a, b); }
|
||||
#define Vc_OP_(op) \
|
||||
static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return Vc_CAT2(_mm256_##op , Vc_SUFFIX)(a, b); }
|
||||
#define Vc_OPx(op, op2) \
|
||||
static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return Vc_CAT2(_mm256_##op2##_, Vc_SUFFIX)(a, b); }
|
||||
|
||||
template<> struct VectorHelper<double> {
|
||||
typedef __m256d VectorType;
|
||||
typedef const VectorType VTArg;
|
||||
typedef double EntryType;
|
||||
#define Vc_SUFFIX pd
|
||||
|
||||
static Vc_ALWAYS_INLINE VectorType notMaskedToZero(VTArg a, __m256 mask) { return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(_mm256_castps_pd(mask), a); }
|
||||
static Vc_ALWAYS_INLINE VectorType set(const double a) { return Vc_CAT2(_mm256_set1_, Vc_SUFFIX)(a); }
|
||||
static Vc_ALWAYS_INLINE VectorType set(const double a, const double b, const double c, const double d) {
|
||||
return Vc_CAT2(_mm256_set_, Vc_SUFFIX)(a, b, c, d);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE VectorType zero() { return Vc_CAT2(_mm256_setzero_, Vc_SUFFIX)(); }
|
||||
static Vc_ALWAYS_INLINE VectorType one() { return Vc_CAT2(setone_, Vc_SUFFIX)(); }// set(1.); }
|
||||
|
||||
static inline void fma(VectorType &v1, VTArg v2, VTArg v3) {
|
||||
#ifdef Vc_IMPL_FMA4
|
||||
v1 = _mm256_macc_pd(v1, v2, v3);
|
||||
#else
|
||||
VectorType h1 = _mm256_and_pd(v1, _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
|
||||
VectorType h2 = _mm256_and_pd(v2, _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
|
||||
#if defined(Vc_GCC) && Vc_GCC < 0x40703
|
||||
// GCC before 4.7.3 uses an incorrect optimization where it replaces the subtraction with an andnot
|
||||
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54703
|
||||
asm("":"+x"(h1), "+x"(h2));
|
||||
#endif
|
||||
const VectorType l1 = _mm256_sub_pd(v1, h1);
|
||||
const VectorType l2 = _mm256_sub_pd(v2, h2);
|
||||
const VectorType ll = mul(l1, l2);
|
||||
const VectorType lh = add(mul(l1, h2), mul(h1, l2));
|
||||
const VectorType hh = mul(h1, h2);
|
||||
// ll < lh < hh for all entries is certain
|
||||
const VectorType lh_lt_v3 = cmplt_pd(abs(lh), abs(v3)); // |lh| < |v3|
|
||||
const VectorType b = _mm256_blendv_pd(v3, lh, lh_lt_v3);
|
||||
const VectorType c = _mm256_blendv_pd(lh, v3, lh_lt_v3);
|
||||
v1 = add(add(ll, b), add(c, hh));
|
||||
#endif
|
||||
}
|
||||
|
||||
static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b) { return _mm256_add_pd(a,b); }
|
||||
static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b) { return _mm256_sub_pd(a,b); }
|
||||
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm256_mul_pd(a,b); }
|
||||
|
||||
Vc_OP1(sqrt)
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VTArg x) {
|
||||
return _mm256_div_pd(one(), sqrt(x));
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x) {
|
||||
return _mm256_div_pd(one(), x);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a) {
|
||||
return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(a, setabsmask_pd());
|
||||
}
|
||||
|
||||
static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return _mm256_min_pd(a, b); }
|
||||
static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return _mm256_max_pd(a, b); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a) {
|
||||
__m128d b = _mm_min_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1));
|
||||
b = _mm_min_sd(b, _mm_unpackhi_pd(b, b));
|
||||
return _mm_cvtsd_f64(b);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a) {
|
||||
__m128d b = _mm_max_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1));
|
||||
b = _mm_max_sd(b, _mm_unpackhi_pd(b, b));
|
||||
return _mm_cvtsd_f64(b);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a) {
|
||||
__m128d b = _mm_mul_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1));
|
||||
b = _mm_mul_sd(b, _mm_shuffle_pd(b, b, _MM_SHUFFLE2(0, 1)));
|
||||
return _mm_cvtsd_f64(b);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a) {
|
||||
__m128d b = _mm_add_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1));
|
||||
b = _mm_hadd_pd(b, b); // or: b = _mm_add_sd(b, _mm256_shuffle_pd(b, b, _MM_SHUFFLE2(0, 1)));
|
||||
return _mm_cvtsd_f64(b);
|
||||
}
|
||||
#undef Vc_SUFFIX
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) {
|
||||
return _mm256_round_pd(a, _MM_FROUND_NINT);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct VectorHelper<float> {
|
||||
typedef float EntryType;
|
||||
typedef __m256 VectorType;
|
||||
typedef const VectorType VTArg;
|
||||
#define Vc_SUFFIX ps
|
||||
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VTArg a, __m256 mask) { return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(mask, a); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a) { return Vc_CAT2(_mm256_set1_, Vc_SUFFIX)(a); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d,
|
||||
const float e, const float f, const float g, const float h) {
|
||||
return Vc_CAT2(_mm256_set_, Vc_SUFFIX)(a, b, c, d, e, f, g, h); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return Vc_CAT2(_mm256_setzero_, Vc_SUFFIX)(); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return Vc_CAT2(setone_, Vc_SUFFIX)(); }// set(1.f); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST __m256 concat(__m256d a, __m256d b) { return _mm256_insertf128_ps(avx_cast<__m256>(_mm256_cvtpd_ps(a)), _mm256_cvtpd_ps(b), 1); }
|
||||
|
||||
static inline void fma(VectorType &v1, VTArg v2, VTArg v3) {
|
||||
#ifdef Vc_IMPL_FMA4
|
||||
v1 = _mm256_macc_ps(v1, v2, v3);
|
||||
#else
|
||||
__m256d v1_0 = _mm256_cvtps_pd(lo128(v1));
|
||||
__m256d v1_1 = _mm256_cvtps_pd(hi128(v1));
|
||||
__m256d v2_0 = _mm256_cvtps_pd(lo128(v2));
|
||||
__m256d v2_1 = _mm256_cvtps_pd(hi128(v2));
|
||||
__m256d v3_0 = _mm256_cvtps_pd(lo128(v3));
|
||||
__m256d v3_1 = _mm256_cvtps_pd(hi128(v3));
|
||||
v1 = AVX::concat(
|
||||
_mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_0, v2_0), v3_0)),
|
||||
_mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_1, v2_1), v3_1)));
|
||||
#endif
|
||||
}
|
||||
|
||||
static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b) { return _mm256_add_ps(a, b); }
|
||||
static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b) { return _mm256_sub_ps(a, b); }
|
||||
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm256_mul_ps(a, b); }
|
||||
|
||||
Vc_OP1(sqrt) Vc_OP1(rsqrt)
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x) {
|
||||
return _mm256_rcp_ps(x);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a) {
|
||||
return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(a, setabsmask_ps());
|
||||
}
|
||||
|
||||
static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return _mm256_min_ps(a, b); }
|
||||
static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return _mm256_max_ps(a, b); }
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a) {
|
||||
__m128 b = _mm_min_ps(lo128(a), hi128(a));
|
||||
b = _mm_min_ps(b, _mm_movehl_ps(b, b)); // b = min(a0, a2), min(a1, a3), min(a2, a2), min(a3, a3)
|
||||
b = _mm_min_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); // b = min(a0, a1), a1, a2, a3
|
||||
return _mm_cvtss_f32(b);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a) {
|
||||
__m128 b = _mm_max_ps(avx_cast<__m128>(a), _mm256_extractf128_ps(a, 1));
|
||||
b = _mm_max_ps(b, _mm_movehl_ps(b, b)); // b = max(a0, a2), max(a1, a3), max(a2, a2), max(a3, a3)
|
||||
b = _mm_max_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); // b = max(a0, a1), a1, a2, a3
|
||||
return _mm_cvtss_f32(b);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a) {
|
||||
__m128 b = _mm_mul_ps(avx_cast<__m128>(a), _mm256_extractf128_ps(a, 1));
|
||||
b = _mm_mul_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
b = _mm_mul_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1)));
|
||||
return _mm_cvtss_f32(b);
|
||||
}
|
||||
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a) {
|
||||
__m128 b = _mm_add_ps(avx_cast<__m128>(a), _mm256_extractf128_ps(a, 1));
|
||||
b = _mm_add_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
b = _mm_add_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1)));
|
||||
return _mm_cvtss_f32(b);
|
||||
}
|
||||
#undef Vc_SUFFIX
|
||||
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) {
|
||||
return _mm256_round_ps(a, _MM_FROUND_NINT);
|
||||
}
|
||||
};
|
||||
|
||||
#undef Vc_OP1
|
||||
#undef Vc_OP
|
||||
#undef Vc_OP_
|
||||
#undef Vc_OPx
|
||||
|
||||
} // namespace AVX(2)
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_AVX_VECTORHELPER_H_
|
|
@ -0,0 +1,166 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_ALGORITHMS_H_
|
||||
#define VC_COMMON_ALGORITHMS_H_
|
||||
|
||||
#include "simdize.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
#ifdef DOXYGEN
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
* \headerfile algorithms.h <Vc/Vc>
|
||||
*
|
||||
* Vc variant of the `std::for_each` algorithm.
|
||||
*
|
||||
* This algorithm calls \p f with one argument of type
|
||||
* `Vc::Vector<` *iterator value type* `, ` *unspecified* `>` as often as is needed to
|
||||
* iterate over the complete range from \p first to \p last.
|
||||
* It will try to use the best vector size (VectorAbi) to work on the largest chunks
|
||||
* possible.
|
||||
* To support aligned loads (and stores) and to support arbitrary range distances, the
|
||||
* algorithm may require the use of `Vc::VectorAbi` types that work on fewer elements in
|
||||
* parallel.
|
||||
*
|
||||
* The following example requires C++14 for generic lambdas. If you don't have generic
|
||||
* lambdas available you can use a "classic" functor type with a templated call operator
|
||||
* instead.
|
||||
*
|
||||
* \code
|
||||
* void scale(std::vector<double> &data, double factor) {
|
||||
* Vc::simd_for_each(data.begin(), data.end(), [&](auto v) {
|
||||
* v *= factor;
|
||||
* });
|
||||
* }
|
||||
* \endcode
|
||||
*/
|
||||
template <class InputIt, class UnaryFunction>
|
||||
UnaryFunction simd_for_each(InputIt first, InputIt last, UnaryFunction f);
|
||||
#else
|
||||
template <class InputIt, class UnaryFunction,
|
||||
class ValueType = typename std::iterator_traits<InputIt>::value_type>
|
||||
inline enable_if<
|
||||
Traits::is_functor_argument_immutable<UnaryFunction, simdize<ValueType>>::value,
|
||||
UnaryFunction>
|
||||
simd_for_each(InputIt first, InputIt last, UnaryFunction f)
|
||||
{
|
||||
typedef simdize<ValueType> V;
|
||||
typedef simdize<ValueType, 1> V1;
|
||||
const auto lastV = last - V::Size + 1;
|
||||
for (; first < lastV; first += V::Size) {
|
||||
V tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
}
|
||||
for (; first != last; ++first) {
|
||||
V1 tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
template <typename InputIt, typename UnaryFunction,
|
||||
class ValueType = typename std::iterator_traits<InputIt>::value_type>
|
||||
inline enable_if<
|
||||
!Traits::is_functor_argument_immutable<UnaryFunction, simdize<ValueType>>::value,
|
||||
UnaryFunction>
|
||||
simd_for_each(InputIt first, InputIt last, UnaryFunction f)
|
||||
{
|
||||
typedef simdize<ValueType> V;
|
||||
typedef simdize<ValueType, 1> V1;
|
||||
const auto lastV = last - V::size() + 1;
|
||||
for (; first < lastV; first += V::size()) {
|
||||
V tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
store_interleaved(tmp, std::addressof(*first));
|
||||
}
|
||||
for (; first != last; ++first) {
|
||||
V1 tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
store_interleaved(tmp, std::addressof(*first));
|
||||
}
|
||||
return f;
|
||||
}
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename InputIt, typename UnaryFunction,
|
||||
class ValueType = typename std::iterator_traits<InputIt>::value_type>
|
||||
inline enable_if<
|
||||
Traits::is_functor_argument_immutable<UnaryFunction, simdize<ValueType>>::value,
|
||||
UnaryFunction>
|
||||
simd_for_each_n(InputIt first, std::size_t count, UnaryFunction f)
|
||||
{
|
||||
typename std::make_signed<size_t>::type len = count;
|
||||
typedef simdize<ValueType> V;
|
||||
typedef simdize<ValueType, 1> V1;
|
||||
for (; len >= int(V::size()); len -= V::Size, first += V::Size) {
|
||||
V tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
}
|
||||
for (; len != 0; --len, ++first) {
|
||||
V1 tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
template <typename InputIt, typename UnaryFunction,
|
||||
class ValueType = typename std::iterator_traits<InputIt>::value_type>
|
||||
inline enable_if<
|
||||
!Traits::is_functor_argument_immutable<UnaryFunction, simdize<ValueType>>::value,
|
||||
UnaryFunction>
|
||||
simd_for_each_n(InputIt first, std::size_t count, UnaryFunction f)
|
||||
{
|
||||
typename std::make_signed<size_t>::type len = count;
|
||||
typedef simdize<ValueType> V;
|
||||
typedef simdize<ValueType, 1> V1;
|
||||
for (; len >= int(V::size()); len -= V::Size, first += V::Size) {
|
||||
V tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
store_interleaved(tmp, std::addressof(*first));
|
||||
}
|
||||
for (; len != 0; --len, ++first) {
|
||||
V1 tmp;
|
||||
load_interleaved(tmp, std::addressof(*first));
|
||||
f(tmp);
|
||||
store_interleaved(tmp, std::addressof(*first));
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_ALGORITHMS_H_
|
|
@ -0,0 +1,121 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2010-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_ALIASINGENTRYHELPER_H_
|
||||
#define VC_COMMON_ALIASINGENTRYHELPER_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
template<class StorageType> class AliasingEntryHelper
|
||||
{
|
||||
private:
|
||||
typedef typename StorageType::EntryType T;
|
||||
#ifdef Vc_ICC
|
||||
StorageType *const m_storage;
|
||||
const int m_index;
|
||||
public:
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper(StorageType *d, int index) : m_storage(d), m_index(index) {}
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper(const AliasingEntryHelper &) = default;
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper(AliasingEntryHelper &&) = default;
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator=(const AliasingEntryHelper &rhs) {
|
||||
m_storage->assign(m_index, rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator =(T x) { m_storage->assign(m_index, x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator +=(T x) { m_storage->assign(m_index, m_storage->m(m_index) + x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator -=(T x) { m_storage->assign(m_index, m_storage->m(m_index) - x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator /=(T x) { m_storage->assign(m_index, m_storage->m(m_index) / x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator *=(T x) { m_storage->assign(m_index, m_storage->m(m_index) * x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator |=(T x) { m_storage->assign(m_index, m_storage->m(m_index) | x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator &=(T x) { m_storage->assign(m_index, m_storage->m(m_index) & x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator ^=(T x) { m_storage->assign(m_index, m_storage->m(m_index) ^ x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator %=(T x) { m_storage->assign(m_index, m_storage->m(m_index) % x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator<<=(T x) { m_storage->assign(m_index, m_storage->m(m_index)<< x); return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator>>=(T x) { m_storage->assign(m_index, m_storage->m(m_index)>> x); return *this; }
|
||||
#define m_data m_storage->read(m_index)
|
||||
#else
|
||||
typedef T A Vc_MAY_ALIAS;
|
||||
A &m_data;
|
||||
public:
|
||||
template<typename T2>
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper(T2 &d) : m_data(reinterpret_cast<A &>(d)) {}
|
||||
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper(A &d) : m_data(d) {}
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator=(const AliasingEntryHelper &rhs) {
|
||||
m_data = rhs.m_data;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator =(T x) { m_data = x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator+=(T x) { m_data += x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator-=(T x) { m_data -= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator/=(T x) { m_data /= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator*=(T x) { m_data *= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator|=(T x) { m_data |= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator&=(T x) { m_data &= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator^=(T x) { m_data ^= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator%=(T x) { m_data %= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator<<=(T x) { m_data <<= x; return *this; }
|
||||
Vc_ALWAYS_INLINE AliasingEntryHelper &operator>>=(T x) { m_data >>= x; return *this; }
|
||||
#endif
|
||||
|
||||
Vc_ALWAYS_INLINE Vc_PURE operator const T() const { return m_data; }
|
||||
|
||||
Vc_ALWAYS_INLINE Vc_PURE bool operator==(T x) const { return static_cast<T>(m_data) == x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE bool operator!=(T x) const { return static_cast<T>(m_data) != x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE bool operator<=(T x) const { return static_cast<T>(m_data) <= x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE bool operator>=(T x) const { return static_cast<T>(m_data) >= x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE bool operator< (T x) const { return static_cast<T>(m_data) < x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE bool operator> (T x) const { return static_cast<T>(m_data) > x; }
|
||||
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator-() const { return -static_cast<T>(m_data); }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator~() const { return ~static_cast<T>(m_data); }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator+(T x) const { return static_cast<T>(m_data) + x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator-(T x) const { return static_cast<T>(m_data) - x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator/(T x) const { return static_cast<T>(m_data) / x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator*(T x) const { return static_cast<T>(m_data) * x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator|(T x) const { return static_cast<T>(m_data) | x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator&(T x) const { return static_cast<T>(m_data) & x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator^(T x) const { return static_cast<T>(m_data) ^ x; }
|
||||
Vc_ALWAYS_INLINE Vc_PURE T operator%(T x) const { return static_cast<T>(m_data) % x; }
|
||||
//T operator<<(T x) const { return static_cast<T>(m_data) << x; }
|
||||
//T operator>>(T x) const { return static_cast<T>(m_data) >> x; }
|
||||
#ifdef m_data
|
||||
#undef m_data
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_ALIASINGENTRYHELPER_H_
|
|
@ -0,0 +1,137 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_ALIGNEDBASE_H_
|
||||
#define VC_COMMON_ALIGNEDBASE_H_
|
||||
|
||||
#include "types.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
/**\internal
|
||||
* Break the recursion of the function below.
|
||||
*/
|
||||
template <typename T> constexpr T max(T a) { return a; }
|
||||
/**\internal
|
||||
* \returns the maximum of all specified arguments.
|
||||
*/
|
||||
template <typename T, typename... Ts> constexpr T max(T a, T b, Ts... rest)
|
||||
{
|
||||
return a > b ? max(a, rest...) : max(b, rest...);
|
||||
}
|
||||
} // namespace Detail
|
||||
namespace Common
|
||||
{
|
||||
template <std::size_t> Vc_INTRINSIC void *aligned_malloc(std::size_t);
|
||||
Vc_ALWAYS_INLINE void free(void *);
|
||||
} // namespace Common
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
*
|
||||
* Helper class to ensure a given alignment.
|
||||
*
|
||||
* This class reimplements the \c new and \c delete operators to align objects allocated
|
||||
* on the heap suitably with the specified alignment \c Alignment.
|
||||
*
|
||||
* \see Vc::VectorAlignedBase
|
||||
* \see Vc::MemoryAlignedBase
|
||||
*/
|
||||
template <std::size_t Alignment> struct alignas(Alignment) AlignedBase
|
||||
{
|
||||
Vc_FREE_STORE_OPERATORS_ALIGNED(Alignment);
|
||||
};
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
*
|
||||
* Helper type to ensure suitable alignment for any Vc::Vector<T> type (using the default
|
||||
* VectorAbi).
|
||||
*
|
||||
* This class reimplements the \c new and \c delete operators to align objects allocated
|
||||
* on the heap suitably for objects of Vc::Vector<T> type. This is necessary since the
|
||||
* standard \c new operator does not adhere to the alignment requirements of the type.
|
||||
*
|
||||
* \see Vc::VectorAlignedBaseT
|
||||
* \see Vc::MemoryAlignedBase
|
||||
* \see Vc::AlignedBase
|
||||
*/
|
||||
using VectorAlignedBase = AlignedBase<
|
||||
Detail::max(alignof(Vector<float>), alignof(Vector<double>), alignof(Vector<ullong>),
|
||||
alignof(Vector<llong>), alignof(Vector<ulong>), alignof(Vector<long>),
|
||||
alignof(Vector<uint>), alignof(Vector<int>), alignof(Vector<ushort>),
|
||||
alignof(Vector<short>), alignof(Vector<uchar>), alignof(Vector<schar>))>;
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
* Variant of the above type ensuring suitable alignment only for the specified vector
|
||||
* type \p V.
|
||||
*
|
||||
* \see Vc::VectorAlignedBase
|
||||
* \see Vc::MemoryAlignedBaseT
|
||||
*/
|
||||
template <typename V> using VectorAlignedBaseT = AlignedBase<alignof(V)>;
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
*
|
||||
* Helper class to ensure suitable alignment for arrays of scalar objects for any
|
||||
* Vc::Vector<T> type (using the default VectorAbi).
|
||||
*
|
||||
* This class reimplements the \c new and \c delete operators to align objects allocated
|
||||
* on the heap suitably for arrays of type \p Vc::Vector<T>::EntryType. Subsequent load
|
||||
* and store operations are safe to use the aligned variant.
|
||||
*
|
||||
* \see Vc::MemoryAlignedBaseT
|
||||
* \see Vc::VectorAlignedBase
|
||||
* \see Vc::AlignedBase
|
||||
*/
|
||||
using MemoryAlignedBase = AlignedBase<
|
||||
Detail::max(Vector<float>::MemoryAlignment, Vector<double>::MemoryAlignment,
|
||||
Vector<ullong>::MemoryAlignment, Vector<llong>::MemoryAlignment,
|
||||
Vector<ulong>::MemoryAlignment, Vector<long>::MemoryAlignment,
|
||||
Vector<uint>::MemoryAlignment, Vector<int>::MemoryAlignment,
|
||||
Vector<ushort>::MemoryAlignment, Vector<short>::MemoryAlignment,
|
||||
Vector<uchar>::MemoryAlignment, Vector<schar>::MemoryAlignment)>;
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
* Variant of the above type ensuring suitable alignment only for the specified vector
|
||||
* type \p V.
|
||||
*
|
||||
* \see Vc::MemoryAlignedBase
|
||||
* \see Vc::VectorAlignedBaseT
|
||||
*/
|
||||
template <typename V> using MemoryAlignedBaseT = AlignedBase<V::MemoryAlignment>;
|
||||
}
|
||||
|
||||
#endif // VC_COMMON_ALIGNEDBASE_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,62 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_BITSCANINTRINSICS_H_
|
||||
#define VC_COMMON_BITSCANINTRINSICS_H_
|
||||
|
||||
#if defined(Vc_GCC) || defined(Vc_CLANG) || defined(Vc_APPLECLANG)
|
||||
#include <x86intrin.h>
|
||||
# ifndef _bit_scan_forward
|
||||
# define _bit_scan_forward(x) __builtin_ctz(x)
|
||||
#include "macros.h"
|
||||
static Vc_ALWAYS_INLINE Vc_CONST int _Vc_bit_scan_reverse_asm(unsigned int x) {
|
||||
int r;
|
||||
__asm__("bsr %1,%0" : "=r"(r) : "X"(x));
|
||||
return r;
|
||||
}
|
||||
# define _bit_scan_reverse(x) _Vc_bit_scan_reverse_asm(x)
|
||||
# endif
|
||||
#elif defined(_WIN32)
|
||||
#include <intrin.h>
|
||||
static inline __forceinline unsigned long _bit_scan_forward(unsigned long x) {
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, x);
|
||||
return index;
|
||||
}
|
||||
static inline __forceinline unsigned long _bit_scan_reverse(unsigned long x) {
|
||||
unsigned long index;
|
||||
_BitScanReverse(&index, x);
|
||||
return index;
|
||||
}
|
||||
#elif defined(Vc_ICC)
|
||||
// for all I know ICC supports the _bit_scan_* intrinsics
|
||||
#else
|
||||
// just assume the compiler can do it
|
||||
#endif
|
||||
|
||||
|
||||
#endif // VC_COMMON_BITSCANINTRINSICS_H_
|
|
@ -0,0 +1,92 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_CONST_H_
|
||||
#define VC_COMMON_CONST_H_
|
||||
|
||||
#include <type_traits>
|
||||
#include "../global.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
|
||||
template <int exponent> constexpr double exponentToFloat(std::integral_constant<bool, true>);
|
||||
template <int exponent> constexpr double exponentToFloat(std::integral_constant<bool, false>);
|
||||
template <> constexpr double exponentToFloat<0>(std::integral_constant<bool, true>)
|
||||
{
|
||||
return 1.;
|
||||
}
|
||||
template <> constexpr double exponentToFloat<0>(std::integral_constant<bool, false>)
|
||||
{
|
||||
return 1.;
|
||||
}
|
||||
template <> constexpr double exponentToFloat<-32>(std::integral_constant<bool, true>)
|
||||
{
|
||||
return 1. / (65536. * 65536.);
|
||||
}
|
||||
template <> constexpr double exponentToFloat<32>(std::integral_constant<bool, false>)
|
||||
{
|
||||
return 65536. * 65536.;
|
||||
}
|
||||
template <> constexpr double exponentToFloat<-64>(std::integral_constant<bool, true>)
|
||||
{
|
||||
return 1. / (65536. * 65536. * 65536. * 65536.);
|
||||
}
|
||||
template <> constexpr double exponentToFloat<64>(std::integral_constant<bool, false>)
|
||||
{
|
||||
return 65536. * 65536. * 65536. * 65536.;
|
||||
}
|
||||
template <int exponent>
|
||||
constexpr double exponentToFloat(std::integral_constant<bool, false> negative)
|
||||
{
|
||||
return exponentToFloat<exponent - 1>(negative) * 2.0;
|
||||
}
|
||||
template <int exponent>
|
||||
constexpr double exponentToFloat(std::integral_constant<bool, true> negative)
|
||||
{
|
||||
return exponentToFloat<exponent + 1>(negative) * 0.5;
|
||||
}
|
||||
template <int sign, unsigned long long mantissa, int exponent> constexpr double doubleConstant()
|
||||
{
|
||||
return (static_cast<double>((mantissa & 0x000fffffffffffffull) | 0x0010000000000000ull) /
|
||||
0x0010000000000000ull) *
|
||||
exponentToFloat<exponent>(std::integral_constant<bool, (exponent < 0)>()) * sign;
|
||||
}
|
||||
template <int sign, unsigned int mantissa, int exponent> constexpr float floatConstant()
|
||||
{
|
||||
return (static_cast<float>((mantissa & 0x007fffffu) | 0x00800000u) / 0x00800000u) *
|
||||
static_cast<float>(
|
||||
exponentToFloat<exponent>(std::integral_constant<bool, (exponent < 0)>())) *
|
||||
sign;
|
||||
}
|
||||
|
||||
} // namespace Detail
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_CONST_H_
|
|
@ -0,0 +1,43 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_CONST_DATA_H_
|
||||
#define VC_COMMON_CONST_DATA_H_
|
||||
|
||||
#include "macros.h"
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
alignas(64) extern unsigned int RandomState[];
|
||||
alignas(32) extern const unsigned int AllBitsSet[8];
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_CONST_DATA_H_
|
|
@ -0,0 +1,91 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2010-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_DEINTERLEAVE_H_
|
||||
#define VC_COMMON_DEINTERLEAVE_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
|
||||
/**
|
||||
* \ingroup Vectors
|
||||
*
|
||||
* \deprecated Turn to InterleavedMemoryWrapper for a more flexible and complete solution.
|
||||
*
|
||||
* Loads two vectors of values from an interleaved array.
|
||||
*
|
||||
* \param a, b The vectors to load the values from memory into.
|
||||
* \param memory The memory location where to read the next 2 * V::Size values from
|
||||
* \param align Either pass Vc::Aligned or Vc::Unaligned. It defaults to Vc::Aligned if nothing is
|
||||
* specified.
|
||||
*
|
||||
* If you store your data as
|
||||
* \code
|
||||
* struct { float x, y; } m[1000];
|
||||
* \endcode
|
||||
* then the deinterleave function allows you to read \p Size concurrent x and y values like this:
|
||||
* \code
|
||||
* Vc::float_v x, y;
|
||||
* Vc::deinterleave(&x, &y, &m[10], Vc::Unaligned);
|
||||
* \endcode
|
||||
* This code will load m[10], m[12], m[14], ... into \p x and m[11], m[13], m[15], ... into \p y.
|
||||
*
|
||||
* The deinterleave function supports the following type combinations:
|
||||
\verbatim
|
||||
V \ M | float | double | ushort | short | uint | int
|
||||
=========|=======|========|========|=======|======|=====
|
||||
float_v | X | | X | X | |
|
||||
---------|-------|--------|--------|-------|------|-----
|
||||
double_v | | X | | | |
|
||||
---------|-------|--------|--------|-------|------|-----
|
||||
int_v | | | | X | | X
|
||||
---------|-------|--------|--------|-------|------|-----
|
||||
uint_v | | | X | | X |
|
||||
---------|-------|--------|--------|-------|------|-----
|
||||
short_v | | | | X | |
|
||||
---------|-------|--------|--------|-------|------|-----
|
||||
ushort_v | | | X | | |
|
||||
\endverbatim
|
||||
*/
|
||||
template<typename V, typename M, typename A> Vc_ALWAYS_INLINE void deinterleave(V *a, V *b,
|
||||
const M *memory, A align)
|
||||
{
|
||||
Detail::deinterleave(*a, *b, memory, align);
|
||||
}
|
||||
|
||||
// documented as default for align above
|
||||
template<typename V, typename M> Vc_ALWAYS_INLINE void deinterleave(V *a, V *b,
|
||||
const M *memory)
|
||||
{
|
||||
Detail::deinterleave(*a, *b, memory, Aligned);
|
||||
}
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_DEINTERLEAVE_H_
|
|
@ -0,0 +1,137 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2018 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_DETAIL_H_
|
||||
#define VC_COMMON_DETAIL_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
// convertIndexVector {{{
|
||||
// if the argument is a Vector<T> already we definitely want to keep it that way
|
||||
template <typename IV>
|
||||
Vc_INTRINSIC enable_if<(Traits::is_simd_vector<IV>::value &&
|
||||
sizeof(typename IV::EntryType) >= sizeof(int)),
|
||||
const IV &>
|
||||
convertIndexVector(const IV &indexVector)
|
||||
{
|
||||
return indexVector;
|
||||
}
|
||||
|
||||
// but if the scalar (integral) type is smaller than int we convert it up to int. Otherwise it's
|
||||
// very likely that the calculations we have to perform will overflow.
|
||||
template <typename IV>
|
||||
Vc_INTRINSIC enable_if<(Traits::is_simd_vector<IV>::value &&
|
||||
sizeof(typename IV::EntryType) < sizeof(int)),
|
||||
fixed_size_simd<int, IV::Size>>
|
||||
convertIndexVector(const IV &indexVector)
|
||||
{
|
||||
return static_cast<fixed_size_simd<int, IV::Size>>(indexVector);
|
||||
}
|
||||
|
||||
// helper for promoting int types to int or higher
|
||||
template <class T> using promoted_type = decltype(std::declval<T>() + 1);
|
||||
|
||||
// std::array, Vc::array, and C-array are fixed size and can therefore be converted to a
|
||||
// fixed_size_simd of the same size
|
||||
template <typename T, std::size_t N>
|
||||
Vc_INTRINSIC enable_if<std::is_integral<T>::value, fixed_size_simd<promoted_type<T>, N>>
|
||||
convertIndexVector(const std::array<T, N> &indexVector)
|
||||
{
|
||||
return fixed_size_simd<promoted_type<T>, N>{std::addressof(indexVector[0]),
|
||||
Vc::Unaligned};
|
||||
}
|
||||
template <typename T, std::size_t N>
|
||||
Vc_INTRINSIC enable_if<std::is_integral<T>::value, fixed_size_simd<promoted_type<T>, N>>
|
||||
convertIndexVector(const Vc::array<T, N> &indexVector)
|
||||
{
|
||||
return fixed_size_simd<promoted_type<T>, N>{std::addressof(indexVector[0]),
|
||||
Vc::Unaligned};
|
||||
}
|
||||
template <typename T, std::size_t N>
|
||||
Vc_INTRINSIC enable_if<std::is_integral<T>::value, fixed_size_simd<promoted_type<T>, N>>
|
||||
convertIndexVector(const T (&indexVector)[N])
|
||||
{
|
||||
return fixed_size_simd<promoted_type<T>, N>{std::addressof(indexVector[0]),
|
||||
Vc::Unaligned};
|
||||
}
|
||||
|
||||
// a plain pointer won't work. Because we need some information on the number of values in
|
||||
// the index argument
|
||||
#ifndef Vc_MSVC
|
||||
// MSVC treats the function as usable in SFINAE context if it is deleted. If it's not declared we
|
||||
// seem to get what we wanted (except for bad diagnostics)
|
||||
template <class T>
|
||||
enable_if<std::is_pointer<T>::value, void> convertIndexVector(T indexVector) = delete;
|
||||
#endif
|
||||
|
||||
// an initializer_list works, but is runtime-sized (before C++14, at least) so we have to
|
||||
// fall back to std::vector
|
||||
template <typename T>
|
||||
Vc_INTRINSIC std::vector<promoted_type<T>> convertIndexVector(
|
||||
const std::initializer_list<T> &indexVector)
|
||||
{
|
||||
return {begin(indexVector), end(indexVector)};
|
||||
}
|
||||
|
||||
// a std::vector cannot be converted to anything better
|
||||
template <typename T>
|
||||
Vc_INTRINSIC
|
||||
enable_if<(std::is_integral<T>::value && sizeof(T) >= sizeof(int)), std::vector<T>>
|
||||
convertIndexVector(const std::vector<T> &indexVector)
|
||||
{
|
||||
return indexVector;
|
||||
}
|
||||
template <typename T>
|
||||
Vc_INTRINSIC enable_if<(std::is_integral<T>::value && sizeof(T) < sizeof(int)),
|
||||
std::vector<promoted_type<T>>>
|
||||
convertIndexVector(const std::vector<T> &indexVector)
|
||||
{
|
||||
return {std::begin(indexVector), std::end(indexVector)};
|
||||
}
|
||||
|
||||
template <class T,
|
||||
class = enable_if<
|
||||
(!std::is_pointer<T>::value && !Traits::is_simd_vector<T>::value &&
|
||||
!std::is_lvalue_reference<decltype(std::declval<const T &>()[0])>::value)>>
|
||||
Vc_INTRINSIC const T &convertIndexVector(const T &i)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
|
||||
// }}}
|
||||
} // namespace Common
|
||||
} // namespace Vc_VERSIONED_NAMESPACE
|
||||
|
||||
#endif // VC_COMMON_DETAIL_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,178 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2016 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_ELEMENTREFERENCE_H_
|
||||
#define VC_COMMON_ELEMENTREFERENCE_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
template <typename U, typename Accessor = U> class ElementReference
|
||||
{
|
||||
friend U;
|
||||
friend Accessor;
|
||||
Vc_INTRINSIC ElementReference(U &o, int i) noexcept : index(i), obj(o) {}
|
||||
|
||||
static constexpr bool get_noexcept =
|
||||
noexcept(Accessor::get(std::declval<U &>(), int()));
|
||||
template <typename T> static constexpr bool set_noexcept()
|
||||
{
|
||||
return noexcept(Accessor::set(std::declval<U &>(), int(), std::declval<T>()));
|
||||
}
|
||||
|
||||
public:
|
||||
using value_type = typename U::value_type;
|
||||
Vc_INTRINSIC ElementReference(const ElementReference &) = delete;
|
||||
|
||||
/**
|
||||
* Move Constructor
|
||||
*
|
||||
* this is the only way to constructor an ElementReference in user code
|
||||
*
|
||||
* \note
|
||||
* Please be aware that this class models the concept of a reference
|
||||
* and as such it can have the same lifetime issue as a standard C++
|
||||
* reference.
|
||||
*
|
||||
* \note
|
||||
* C++ 17 support copy-elision, which in turn allows to
|
||||
* the ElementReference obtained via operator[] from a function
|
||||
* and avoid copying. C++11 and C++14 don't offer this, thus we add
|
||||
* the move constructor, to allow them to move the data and thus avoid
|
||||
* copying (which was prohibited by the deleted constructor above
|
||||
*/
|
||||
Vc_INTRINSIC ElementReference(ElementReference &&) = default;
|
||||
|
||||
Vc_INTRINSIC operator value_type() const noexcept(get_noexcept)
|
||||
{
|
||||
return Accessor::get(obj, index);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Vc_INTRINSIC ElementReference &operator=(T &&x) &&
|
||||
noexcept(noexcept(Accessor::set(std::declval<U &>(), int(), std::declval<T>())))
|
||||
{
|
||||
Accessor::set(obj, index, std::forward<T>(x));
|
||||
return *this;
|
||||
}
|
||||
|
||||
// TODO: improve with operator.()
|
||||
|
||||
#define Vc_OP_(op_) \
|
||||
template <typename T, typename R = decltype(std::declval<const value_type &>() \
|
||||
op_ std::declval<T>())> \
|
||||
Vc_INTRINSIC ElementReference &operator op_##=(T &&x) && \
|
||||
noexcept(get_noexcept && noexcept(Accessor::set(std::declval<U &>(), int(), \
|
||||
std::declval<R &&>()))) \
|
||||
{ \
|
||||
const value_type &lhs = Accessor::get(obj, index); \
|
||||
Accessor::set(obj, index, lhs op_ std::forward<T>(x)); \
|
||||
return *this; \
|
||||
}
|
||||
Vc_ALL_ARITHMETICS(Vc_OP_);
|
||||
Vc_ALL_SHIFTS(Vc_OP_);
|
||||
Vc_ALL_BINARY(Vc_OP_);
|
||||
#undef Vc_OP_
|
||||
|
||||
template <typename = void>
|
||||
Vc_INTRINSIC ElementReference &operator++() &&
|
||||
noexcept(noexcept(std::declval<value_type &>() =
|
||||
Accessor::get(std::declval<U &>(), int())) &&
|
||||
set_noexcept<decltype(++std::declval<value_type &>())>())
|
||||
{
|
||||
value_type x = Accessor::get(obj, index);
|
||||
Accessor::set(obj, index, ++x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename = void>
|
||||
Vc_INTRINSIC value_type operator++(int) &&
|
||||
noexcept(noexcept(std::declval<value_type &>() =
|
||||
Accessor::get(std::declval<U &>(), int())) &&
|
||||
set_noexcept<decltype(std::declval<value_type &>()++)>())
|
||||
{
|
||||
const value_type r = Accessor::get(obj, index);
|
||||
value_type x = r;
|
||||
Accessor::set(obj, index, ++x);
|
||||
return r;
|
||||
}
|
||||
|
||||
template <typename = void>
|
||||
Vc_INTRINSIC ElementReference &operator--() &&
|
||||
noexcept(noexcept(std::declval<value_type &>() =
|
||||
Accessor::get(std::declval<U &>(), int())) &&
|
||||
set_noexcept<decltype(--std::declval<value_type &>())>())
|
||||
{
|
||||
value_type x = Accessor::get(obj, index);
|
||||
Accessor::set(obj, index, --x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename = void>
|
||||
Vc_INTRINSIC value_type operator--(int) &&
|
||||
noexcept(noexcept(std::declval<value_type &>() =
|
||||
Accessor::get(std::declval<U &>(), int())) &&
|
||||
set_noexcept<decltype(std::declval<value_type &>()--)>())
|
||||
{
|
||||
const value_type r = Accessor::get(obj, index);
|
||||
value_type x = r;
|
||||
Accessor::set(obj, index, --x);
|
||||
return r;
|
||||
}
|
||||
|
||||
friend void swap(ElementReference &&a, ElementReference &&b) {
|
||||
value_type tmp(a);
|
||||
static_cast<ElementReference &&>(a) = static_cast<value_type>(b);
|
||||
static_cast<ElementReference &&>(b) = tmp;
|
||||
}
|
||||
|
||||
friend void swap(value_type &a, ElementReference &&b) {
|
||||
value_type tmp(a);
|
||||
a = static_cast<value_type>(b);
|
||||
static_cast<ElementReference &&>(b) = tmp;
|
||||
}
|
||||
|
||||
friend void swap(ElementReference &&a, value_type &b) {
|
||||
value_type tmp(a);
|
||||
static_cast<ElementReference &&>(a) = b;
|
||||
b = tmp;
|
||||
}
|
||||
|
||||
private:
|
||||
int index;
|
||||
U &obj;
|
||||
};
|
||||
|
||||
} // namespace Detail
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_ELEMENTREFERENCE_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,91 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2012-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
|
||||
The exp implementation is derived from Cephes, which carries the
|
||||
following Copyright notice:
|
||||
|
||||
Cephes Math Library Release 2.2: June, 1992
|
||||
Copyright 1984, 1987, 1989 by Stephen L. Moshier
|
||||
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifdef Vc_COMMON_MATH_H_INTERNAL
|
||||
|
||||
constexpr float log2_e = 1.44269504088896341f;
|
||||
|
||||
// These constants are adjusted to account for single-precision floating point.
|
||||
// The original are for double precision:
|
||||
//
|
||||
// constexpr float MAXLOGF = 88.72283905206835f;
|
||||
// constexpr float MINLOGF = -103.278929903431851103f; /* log(2^-149) */
|
||||
|
||||
constexpr float MAXLOGF = 88.722831726074219f; /* log(2^127.99998474121094f) */
|
||||
constexpr float MINLOGF = -88.029685974121094f; /* log(2^-126.99999237060547f) */
|
||||
constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
|
||||
|
||||
template <typename Abi, typename = enable_if<std::is_same<Abi, VectorAbi::Sse>::value ||
|
||||
std::is_same<Abi, VectorAbi::Avx>::value>>
|
||||
inline Vector<float, detail::not_fixed_size_abi<Abi>> exp(Vector<float, Abi> x)
|
||||
{
|
||||
using V = Vector<float, Abi>;
|
||||
typedef typename V::Mask M;
|
||||
typedef Detail::Const<float, Abi> C;
|
||||
|
||||
const M overflow = x > MAXLOGF;
|
||||
const M underflow = x < MINLOGF;
|
||||
|
||||
// log₂(eˣ) = x * log₂(e) * log₂(2)
|
||||
// = log₂(2^(x * log₂(e)))
|
||||
// => eˣ = 2^(x * log₂(e))
|
||||
// => n = ⌊x * log₂(e) + ½⌋
|
||||
// => y = x - n * ln(2) | recall that: ln(2) * log₂(e) == 1
|
||||
// <=> eˣ = 2ⁿ * eʸ
|
||||
V z = floor(C::log2_e() * x + 0.5f);
|
||||
const auto n = static_cast<Vc::SimdArray<int, V::Size>>(z);
|
||||
x -= z * C::ln2_large();
|
||||
x -= z * C::ln2_small();
|
||||
|
||||
/* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */
|
||||
z = ((((( 1.9875691500E-4f * x
|
||||
+ 1.3981999507E-3f) * x
|
||||
+ 8.3334519073E-3f) * x
|
||||
+ 4.1665795894E-2f) * x
|
||||
+ 1.6666665459E-1f) * x
|
||||
+ 5.0000001201E-1f) * (x * x)
|
||||
+ x
|
||||
+ 1.0f;
|
||||
|
||||
x = ldexp(z, n); // == z * 2ⁿ
|
||||
|
||||
x(overflow) = std::numeric_limits<typename V::EntryType>::infinity();
|
||||
x.setZero(underflow);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif // Vc_COMMON_MATH_H_INTERNAL
|
|
@ -0,0 +1,79 @@
|
|||
/*{{{
|
||||
Copyright (C) 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Permission to use, copy, modify, and distribute this software
|
||||
and its documentation for any purpose and without fee is hereby
|
||||
granted, provided that the above copyright notice appear in all
|
||||
copies and that both that the copyright notice and this
|
||||
permission notice and warranty disclaimer appear in supporting
|
||||
documentation, and that the name of the author not be used in
|
||||
advertising or publicity pertaining to distribution of the
|
||||
software without specific, written prior permission.
|
||||
|
||||
The author disclaim all warranties with regard to this
|
||||
software, including all implied warranties of merchantability
|
||||
and fitness. In no event shall the author be liable for any
|
||||
special, indirect or consequential damages or any damages
|
||||
whatsoever resulting from loss of use, data or profits, whether
|
||||
in an action of contract, negligence or other tortious action,
|
||||
arising out of or in connection with the use or performance of
|
||||
this software.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_FIX_CLANG_EMMINTRIN_H_
|
||||
#define VC_COMMON_FIX_CLANG_EMMINTRIN_H_
|
||||
|
||||
#include "../global.h"
|
||||
|
||||
#if (defined Vc_CLANG && Vc_CLANG < 0x30700) || (defined Vc_APPLECLANG && Vc_APPLECLANG < 0x70000)
|
||||
|
||||
#ifdef _mm_slli_si128
|
||||
#undef _mm_slli_si128
|
||||
#define _mm_slli_si128(a, count) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_pslldqi128((__m128i)(a), (count)*8); })
|
||||
#endif
|
||||
|
||||
#ifdef _mm_srli_si128
|
||||
#undef _mm_srli_si128
|
||||
#define _mm_srli_si128(a, count) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_psrldqi128((__m128i)(a), (count)*8); })
|
||||
#endif
|
||||
|
||||
#ifdef _mm_shuffle_epi32
|
||||
#undef _mm_shuffle_epi32
|
||||
#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), (__v4si) _mm_set1_epi32(0), \
|
||||
(imm) & 0x3, ((imm) & 0xc) >> 2, \
|
||||
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
|
||||
#endif
|
||||
|
||||
#ifdef _mm_shufflelo_epi16
|
||||
#undef _mm_shufflelo_epi16
|
||||
#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), (__v8hi) _mm_set1_epi16(0), \
|
||||
(imm) & 0x3, ((imm) & 0xc) >> 2, \
|
||||
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
|
||||
4, 5, 6, 7); })
|
||||
#endif
|
||||
|
||||
#ifdef _mm_shufflehi_epi16
|
||||
#undef _mm_shufflehi_epi16
|
||||
#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), (__v8hi) _mm_set1_epi16(0), \
|
||||
0, 1, 2, 3, \
|
||||
4 + (((imm) & 0x03) >> 0), \
|
||||
4 + (((imm) & 0x0c) >> 2), \
|
||||
4 + (((imm) & 0x30) >> 4), \
|
||||
4 + (((imm) & 0xc0) >> 6)); })
|
||||
#endif
|
||||
|
||||
#ifdef _mm_shuffle_pd
|
||||
#undef _mm_shuffle_pd
|
||||
#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
|
||||
__builtin_shufflevector((__m128d)(a), (__m128d)(b), (i) & 1, (((i) & 2) >> 1) + 2); })
|
||||
#endif
|
||||
|
||||
#endif // Vc_CLANG || Vc_APPLECLANG
|
||||
|
||||
#endif // VC_COMMON_FIX_CLANG_EMMINTRIN_H_
|
|
@ -0,0 +1,318 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_GATHERIMPLEMENTATION_H_
|
||||
#define VC_COMMON_GATHERIMPLEMENTATION_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
enum class GatherScatterImplementation : int {
|
||||
SimpleLoop,
|
||||
SetIndexZero,
|
||||
BitScanLoop,
|
||||
PopcntSwitch
|
||||
};
|
||||
|
||||
using SimpleLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
|
||||
using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
|
||||
using BitScanLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
|
||||
using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeGather(SetIndexZeroT,
|
||||
V &v,
|
||||
const MT *mem,
|
||||
IT &&indexes_,
|
||||
typename V::MaskArgument mask)
|
||||
{
|
||||
auto indexes = std::forward<IT>(indexes_);
|
||||
indexes.setZeroInverted(static_cast<decltype(!indexes)>(mask));
|
||||
const V tmp(mem, indexes);
|
||||
where(mask) | v = tmp;
|
||||
}
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeGather(SimpleLoopT, V &v, const MT *mem, const IT &indexes,
|
||||
const typename V::MaskArgument mask)
|
||||
{
|
||||
if (Vc_IS_UNLIKELY(mask.isEmpty())) {
|
||||
return;
|
||||
}
|
||||
#if defined Vc_GCC && Vc_GCC >= 0x40900
|
||||
// GCC 4.8 doesn't support dependent type and constexpr vector_size argument
|
||||
constexpr std::size_t Sizeof = sizeof(V);
|
||||
using Builtin [[gnu::vector_size(Sizeof)]] = typename V::value_type;
|
||||
Builtin tmp = reinterpret_cast<Builtin>(v.data());
|
||||
Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
|
||||
if (mask[i]) {
|
||||
tmp[i] = mem[indexes[i]];
|
||||
}
|
||||
});
|
||||
v.data() = reinterpret_cast<typename V::VectorType>(tmp);
|
||||
#else
|
||||
Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
|
||||
if (mask[i])
|
||||
v[i] = mem[indexes[i]];
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeGather(BitScanLoopT,
|
||||
V &v,
|
||||
const MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask)
|
||||
{
|
||||
#ifdef Vc_GNU_ASM
|
||||
size_t bits = mask.toInt();
|
||||
while (Vc_IS_LIKELY(bits > 0)) {
|
||||
size_t i, j;
|
||||
asm("bsf %[bits],%[i]\n\t"
|
||||
"bsr %[bits],%[j]\n\t"
|
||||
"btr %[i],%[bits]\n\t"
|
||||
"btr %[j],%[bits]\n\t"
|
||||
: [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits));
|
||||
v[i] = mem[indexes[i]];
|
||||
v[j] = mem[indexes[j]];
|
||||
}
|
||||
#else
|
||||
// Alternative from Vc::SSE (0.7)
|
||||
int bits = mask.toInt();
|
||||
while (bits) {
|
||||
const int i = _bit_scan_forward(bits);
|
||||
bits &= bits - 1;
|
||||
v[i] = mem[indexes[i]];
|
||||
}
|
||||
#endif // Vc_GNU_ASM
|
||||
}
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
|
||||
V &v,
|
||||
const MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 16> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low, high = 0;
|
||||
switch (Vc::Detail::popcnt16(bits)) {
|
||||
case 16:
|
||||
v.gather(mem, indexes);
|
||||
break;
|
||||
case 15:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= 1 << low;
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 14:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 13:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 12:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 11:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 10:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 9:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 8:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 7:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 6:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 5:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 4:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 3:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 2:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
// fallthrough
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
|
||||
V &v,
|
||||
const MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 8> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low, high = 0;
|
||||
switch (Vc::Detail::popcnt8(bits)) {
|
||||
case 8:
|
||||
v.gather(mem, indexes);
|
||||
break;
|
||||
case 7:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= 1 << low;
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 6:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 5:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 4:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
high = (1 << high);
|
||||
// fallthrough
|
||||
case 3:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 2:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
// fallthrough
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
|
||||
V &v,
|
||||
const MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 4> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low, high = 0;
|
||||
switch (Vc::Detail::popcnt4(bits)) {
|
||||
case 4:
|
||||
v.gather(mem, indexes);
|
||||
break;
|
||||
case 3:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= 1 << low;
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 2:
|
||||
high = _bit_scan_reverse(bits);
|
||||
v[high] = mem[indexes[high]];
|
||||
// fallthrough
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
|
||||
V &v,
|
||||
const MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 2> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low;
|
||||
switch (Vc::Detail::popcnt4(bits)) {
|
||||
case 2:
|
||||
v.gather(mem, indexes);
|
||||
break;
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
v[low] = mem[indexes[low]];
|
||||
// fallthrough
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_GATHERIMPLEMENTATION_H_
|
|
@ -0,0 +1,221 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef Vc_CURRENT_CLASS_NAME
|
||||
#error "incorrect use of common/gatherinterface.h: Vc_CURRENT_CLASS_NAME must be defined to the current class name for declaring constructors."
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// gathers
|
||||
// A gather takes the following arguments:
|
||||
// 1. A const pointer to memory of any type that can convert to EntryType
|
||||
// 2. An indexes “vector”. The requirement is that the type implements the subscript operator,
|
||||
// stores «Size» valid index values, and each offset to the pointer above yields a valid
|
||||
// memory location for reading.
|
||||
// 3. Optionally the third argument may be a mask. The mask disables several memory reads and
|
||||
// thus removes the requirements in (2.) for the disabled entries.
|
||||
|
||||
private:
|
||||
/**\internal
|
||||
* This function implements a gather given a pointer to memory \p mem and some
|
||||
* container object storing the gather \p indexes.
|
||||
*
|
||||
* \param mem This pointer must be aligned correctly for the type \p MT. This is the
|
||||
* natural behavior of C++, so this is typically the case.
|
||||
* \param indexes This object contains at least \VSize{T} indexes that denote the
|
||||
* offset in \p mem where the components for the current vector should be copied from.
|
||||
* The offset is not in Bytes, but in multiples of `sizeof(MT)`.
|
||||
*/
|
||||
// enable_if<std::can_convert<MT, EntryType>::value &&
|
||||
// has_subscript_operator<IT>::value>
|
||||
template <class MT, class IT, int Scale = 1>
|
||||
inline void gatherImplementation(const Common::GatherArguments<MT, IT, Scale> &);
|
||||
|
||||
/**\internal
|
||||
* This overload of the above function adds a \p mask argument to disable memory
|
||||
* accesses at the \p indexes offsets where \p mask is \c false.
|
||||
*/
|
||||
template <class MT, class IT, int Scale = 1>
|
||||
inline void gatherImplementation(const Common::GatherArguments<MT, IT, Scale> &,
|
||||
MaskArgument mask);
|
||||
|
||||
public:
|
||||
#define Vc_ASSERT_GATHER_PARAMETER_TYPES_ \
|
||||
static_assert( \
|
||||
std::is_convertible<MT, EntryType>::value, \
|
||||
"The memory pointer needs to point to a type that can be converted to the " \
|
||||
"EntryType of this SIMD vector type."); \
|
||||
static_assert( \
|
||||
Vc::Traits::has_subscript_operator<IT>::value, \
|
||||
"The indexes argument must be a type that implements the subscript operator."); \
|
||||
static_assert( \
|
||||
!Traits::is_simd_vector<IT>::value || \
|
||||
Traits::simd_vector_size<IT>::value >= Size, \
|
||||
"If you use a SIMD vector for the indexes parameter, the index vector must " \
|
||||
"have at least as many entries as this SIMD vector."); \
|
||||
static_assert( \
|
||||
!std::is_array<T>::value || \
|
||||
(std::rank<T>::value == 1 && \
|
||||
(std::extent<T>::value == 0 || std::extent<T>::value >= Size)), \
|
||||
"If you use a simple array for the indexes parameter, the array must have " \
|
||||
"at least as many entries as this SIMD vector.")
|
||||
|
||||
/**
|
||||
* \name Gather constructors and member functions
|
||||
*
|
||||
* Constructs or loads a vector from the objects at `mem[indexes[0]]`,
|
||||
* `mem[indexes[1]]`, `mem[indexes[2]]`, ...
|
||||
*
|
||||
* All gather functions optionally take a mask as last argument. In that case only the
|
||||
* entries that are selected in the mask are accessed in memory and copied to the
|
||||
* vector. This enables invalid indexes in the \p indexes vector if those are masked
|
||||
* off in \p mask.
|
||||
*
|
||||
* Gathers from structured data (AoS: arrays of struct) are possible via a special
|
||||
* subscript operator of the container (array). You can use \ref Vc::array and \ref
|
||||
* Vc::vector as drop-in replacements for \c std::array and \c std::vector. These
|
||||
* container classes contain the necessary subscript operator overload. Example:
|
||||
* \code
|
||||
* Vc::vector<float> data(100);
|
||||
* std::iota(data.begin(), data.end(), 0.f); // fill with values 0, 1, 2, ...
|
||||
* auto indexes = float_v::IndexType::IndexesFromZero();
|
||||
* float_v gathered = data[indexes]; // gathered == [0, 1, 2, ...]
|
||||
* \endcode
|
||||
*
|
||||
* This also works for gathers into arrays of structures:
|
||||
* \code
|
||||
* struct Point { float x, y, z; };
|
||||
* Vc::array<Point, 100> points;
|
||||
* // fill points ...
|
||||
* auto indexes = float_v::IndexType::IndexesFromZero();
|
||||
* float_v xs = data[indexes][&Point::x]; // [points[0].x, points[1].x, points[2].x, ...]
|
||||
* float_v ys = data[indexes][&Point::y]; // [points[0].y, points[1].y, points[2].y, ...]
|
||||
* float_v zs = data[indexes][&Point::z]; // [points[0].z, points[1].z, points[2].z, ...]
|
||||
* \endcode
|
||||
*
|
||||
* Alternatively, you can use Vc::Common::AdaptSubscriptOperator to extend a given
|
||||
* container class with the necessary subscript operator. Example:
|
||||
* \code
|
||||
* template <typename T, typename Allocator = std::allocator<T>>
|
||||
* using my_vector = Vc::Common::AdaptSubscriptOperator<std::vector<T, Allocator>>;
|
||||
* \endcode
|
||||
*
|
||||
* \param mem A pointer to memory which contains objects of type \p MT at the offsets
|
||||
* given by \p indexes.
|
||||
* \param indexes A container/vector of offsets into \p mem.
|
||||
* The type of \p indexes (\p IT) may either be a pointer to integers
|
||||
* (C-array) or a vector of integers (preferrably IndexType).
|
||||
* \param mask If a mask is given, only the active entries will be copied from memory.
|
||||
*
|
||||
* \note If you use a masked gather constructor the masked-off entries of the vector
|
||||
* are zero-initilized.
|
||||
*/
|
||||
///@{
|
||||
|
||||
/// Gather constructor
|
||||
template <typename MT, typename IT,
|
||||
typename = enable_if<Traits::has_subscript_operator<IT>::value>>
|
||||
Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const MT *mem, const IT &indexes)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(
|
||||
Common::make_gather<1>(mem, Common::convertIndexVector(indexes)));
|
||||
}
|
||||
|
||||
template <class MT, class IT, int Scale>
|
||||
Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const Common::GatherArguments<MT, IT, Scale> &args)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(args);
|
||||
}
|
||||
|
||||
/// Masked gather constructor
|
||||
template <typename MT, typename IT,
|
||||
typename = enable_if<Vc::Traits::has_subscript_operator<IT>::value>>
|
||||
Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const MT *mem, const IT &indexes,
|
||||
MaskArgument mask)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(
|
||||
Common::make_gather<1>(mem, Common::convertIndexVector(indexes)), mask);
|
||||
}
|
||||
|
||||
template <class MT, class IT, int Scale>
|
||||
Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const Common::GatherArguments<MT, IT, Scale> &args,
|
||||
MaskArgument mask)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(args, mask);
|
||||
}
|
||||
|
||||
/// Gather function
|
||||
template <typename MT, typename IT,
|
||||
typename = enable_if<Vc::Traits::has_subscript_operator<IT>::value>>
|
||||
Vc_INTRINSIC void gather(const MT *mem, const IT &indexes)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(
|
||||
Common::make_gather<1>(mem, Common::convertIndexVector(indexes)));
|
||||
}
|
||||
|
||||
/// Masked gather function
|
||||
template <typename MT, typename IT,
|
||||
typename = enable_if<Vc::Traits::has_subscript_operator<IT>::value>>
|
||||
Vc_INTRINSIC void gather(const MT *mem, const IT &indexes, MaskArgument mask)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(
|
||||
Common::make_gather<1>(mem, Common::convertIndexVector(indexes)), mask);
|
||||
}
|
||||
///@}
|
||||
|
||||
#include "gatherinterface_deprecated.h"
|
||||
|
||||
/**\internal
|
||||
* \name Gather function to use from Vc::Common::subscript_operator
|
||||
*
|
||||
* \param args
|
||||
* \param mask
|
||||
*/
|
||||
///@{
|
||||
template <class MT, class IT, int Scale>
|
||||
Vc_INTRINSIC void gather(const Common::GatherArguments<MT, IT, Scale> &args)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(args);
|
||||
}
|
||||
|
||||
template <class MT, class IT, int Scale>
|
||||
Vc_INTRINSIC void gather(const Common::GatherArguments<MT, IT, Scale> &args,
|
||||
MaskArgument mask)
|
||||
{
|
||||
Vc_ASSERT_GATHER_PARAMETER_TYPES_;
|
||||
gatherImplementation(args, mask);
|
||||
}
|
||||
///@}
|
||||
|
||||
#undef Vc_ASSERT_GATHER_PARAMETER_TYPES_
|
|
@ -0,0 +1,300 @@
|
|||
/// \name Deprecated Members
|
||||
///@{
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
*/
|
||||
template <typename S1, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array,
|
||||
const EntryType S1::*member1,
|
||||
IT indexes)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1]
|
||||
.gatherArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array,
|
||||
const EntryType S1::*member1,
|
||||
IT indexes, MaskArgument mask)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1]
|
||||
.gatherArguments(),
|
||||
mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that
|
||||
* struct (i.e. array[i].*member1.*member2 is read).
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
*/
|
||||
template <typename S1, typename S2, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array,
|
||||
const S2 S1::*member1,
|
||||
const EntryType S2::*member2,
|
||||
IT indexes)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1][member2]
|
||||
.gatherArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that
|
||||
* struct (i.e. array[i].*member1.*member2 is read).
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename S2, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array,
|
||||
const S2 S1::*member1,
|
||||
const EntryType S2::*member2,
|
||||
IT indexes, MaskArgument mask)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1][member2]
|
||||
.gatherArguments(),
|
||||
mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param outerIndexes
|
||||
* \param innerIndexes
|
||||
*/
|
||||
template <typename S1, typename IT1, typename IT2>
|
||||
Vc_DEPRECATED(
|
||||
"use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array,
|
||||
const EntryType *const S1::*ptrMember1,
|
||||
IT1 outerIndexes, IT2 innerIndexes)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT1, std::ratio<1, 1>, true>(
|
||||
array, outerIndexes)[ptrMember1][innerIndexes]
|
||||
.gatherArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param outerIndexes
|
||||
* \param innerIndexes
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename IT1, typename IT2>
|
||||
Vc_DEPRECATED(
|
||||
"use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array,
|
||||
const EntryType *const S1::*ptrMember1,
|
||||
IT1 outerIndexes, IT2 innerIndexes,
|
||||
MaskArgument mask)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT1, std::ratio<1, 1>, true>(
|
||||
array, outerIndexes)[ptrMember1][innerIndexes]
|
||||
.gatherArguments(),
|
||||
mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
*/
|
||||
template <typename S1, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void gather(const S1 *array,
|
||||
const EntryType S1::*member1, IT indexes)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1]
|
||||
.gatherArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void gather(const S1 *array,
|
||||
const EntryType S1::*member1,
|
||||
IT indexes,
|
||||
MaskArgument mask)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1]
|
||||
.gatherArguments(),
|
||||
mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that
|
||||
* struct (i.e. array[i].*member1.*member2 is read).
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
*/
|
||||
template <typename S1, typename S2, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void gather(const S1 *array, const S2 S1::*member1,
|
||||
const EntryType S2::*member2, IT indexes)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1][member2]
|
||||
.gatherArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that
|
||||
* struct (i.e. array[i].*member1.*member2 is read).
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename S2, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void gather(const S1 *array, const S2 S1::*member1,
|
||||
const EntryType S2::*member2, IT indexes,
|
||||
MaskArgument mask)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1][member2]
|
||||
.gatherArguments(),
|
||||
mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param outerIndexes
|
||||
* \param innerIndexes
|
||||
*/
|
||||
template <typename S1, typename IT1, typename IT2>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void gather(const S1 *array,
|
||||
const EntryType *const S1::*ptrMember1,
|
||||
IT1 outerIndexes, IT2 innerIndexes)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT1, std::ratio<1, 1>, true>(
|
||||
array, outerIndexes)[ptrMember1][innerIndexes]
|
||||
.gatherArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param outerIndexes
|
||||
* \param innerIndexes
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename IT1, typename IT2>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void gather(const S1 *array,
|
||||
const EntryType *const S1::*ptrMember1,
|
||||
IT1 outerIndexes, IT2 innerIndexes,
|
||||
MaskArgument mask)
|
||||
{
|
||||
gather(Common::SubscriptOperation<const S1, IT1, std::ratio<1, 1>, true>(
|
||||
array, outerIndexes)[ptrMember1][innerIndexes]
|
||||
.gatherArguments(),
|
||||
mask);
|
||||
}
|
||||
///@}
|
|
@ -0,0 +1,61 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
public:
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// init to zero
|
||||
Vc_INTRINSIC Vector() = default;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// types
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// constants
|
||||
static constexpr std::size_t size() { return Size; }
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// constant Vectors
|
||||
explicit Vc_INTRINSIC_L Vector(VectorSpecialInitializerZero) Vc_INTRINSIC_R;
|
||||
explicit Vc_INTRINSIC_L Vector(VectorSpecialInitializerOne) Vc_INTRINSIC_R;
|
||||
explicit Vc_INTRINSIC_L Vector(VectorSpecialInitializerIndexesFromZero) Vc_INTRINSIC_R;
|
||||
static Vc_INTRINSIC Vc_CONST Vector Zero() { return Vector(Vc::Zero); }
|
||||
static Vc_INTRINSIC Vc_CONST Vector One() { return Vector(Vc::One); }
|
||||
static Vc_INTRINSIC Vc_CONST Vector IndexesFromZero()
|
||||
{
|
||||
return Vector(Vc::IndexesFromZero);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// generator ctor
|
||||
template <class G, int = 0,
|
||||
class = typename std::enable_if<std::is_convertible<
|
||||
decltype(std::declval<G>()(size_t())), value_type>::value>::type>
|
||||
explicit Vector(G &&g) : Vector(generate(std::forward<G>(g)))
|
||||
{
|
||||
}
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,97 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2012-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_IIF_H_
|
||||
#define VC_COMMON_IIF_H_
|
||||
|
||||
#include "../type_traits"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
*
|
||||
* Function to mimic the ternary operator '?:' (inline-if).
|
||||
*
|
||||
* \param condition Determines which values are returned. This is analog to the first argument to
|
||||
* the ternary operator.
|
||||
* \param trueValue The values to return where \p condition is \c true.
|
||||
* \param falseValue The values to return where \p condition is \c false.
|
||||
* \return A combination of entries from \p trueValue and \p falseValue, according to \p condition.
|
||||
*
|
||||
* So instead of the scalar variant
|
||||
* \code
|
||||
* float x = a > 1.f ? b : b + c;
|
||||
* \endcode
|
||||
* you'd write
|
||||
* \code
|
||||
* float_v x = Vc::iif (a > 1.f, b, b + c);
|
||||
* \endcode
|
||||
*
|
||||
* Assuming \c a has the values [0, 3, 5, 1], \c b is [1, 1, 1, 1], and \c c is [1, 2, 3, 4], then x
|
||||
* will be [2, 2, 3, 5].
|
||||
*/
|
||||
template <typename Mask, typename T>
|
||||
Vc_ALWAYS_INLINE enable_if<is_simd_mask<Mask>::value && is_simd_vector<T>::value, T> iif(
|
||||
const Mask &condition, const T &trueValue, const T &falseValue)
|
||||
{
|
||||
T result(falseValue);
|
||||
Vc::where(condition) | result = trueValue;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**\internal
|
||||
* The following declaration makes it explicit that `iif (Mask, non-vector, non-vector)`
|
||||
* is not supposed to work. Doing the same thing with \c static_assert would break SFINAE.
|
||||
*/
|
||||
template <typename Mask, typename T>
|
||||
enable_if<is_simd_mask<Mask>::value && !is_simd_vector<T>::value, T> iif(
|
||||
const Mask &, const T &, const T &) = delete;
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
*
|
||||
* Overload of the above for boolean conditions.
|
||||
*
|
||||
* This typically results in direct use of the ternary operator. This function makes it easier to
|
||||
* switch from a Vc type to a builtin type.
|
||||
*
|
||||
* \param condition Determines which value is returned. This is analog to the first argument to
|
||||
* the ternary operator.
|
||||
* \param trueValue The value to return if \p condition is \c true.
|
||||
* \param falseValue The value to return if \p condition is \c false.
|
||||
* \return Either \p trueValue or \p falseValue, depending on \p condition.
|
||||
*/
|
||||
template<typename T> constexpr T iif (bool condition, const T &trueValue, const T &falseValue)
|
||||
{
|
||||
return condition ? trueValue : falseValue;
|
||||
}
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_IIF_H_
|
|
@ -0,0 +1,79 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_INDEXSEQUENCE_H_
|
||||
#define VC_COMMON_INDEXSEQUENCE_H_
|
||||
|
||||
#include "../global.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/** \internal
|
||||
* Helper class for a sequence of size_t values from 0 to N. This type will be included in
|
||||
* C++14.
|
||||
*/
|
||||
template <std::size_t... I> struct index_sequence
|
||||
{
|
||||
static constexpr std::size_t size() noexcept { return sizeof...(I); }
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* This struct builds an index_sequence type from a given upper bound \p N.
|
||||
* It does so recursively via concatenation of to index sequences of length N/2.
|
||||
*/
|
||||
template <std::size_t N> struct make_index_sequence_impl {
|
||||
template <std::size_t Offset, std::size_t... Ns>
|
||||
static index_sequence<Ns..., (Ns + Offset)...> join(std::false_type,
|
||||
index_sequence<Ns...>);
|
||||
template <std::size_t Offset, std::size_t... Ns>
|
||||
static index_sequence<Ns..., Offset - 1, (Ns + Offset)...> join(
|
||||
std::true_type, index_sequence<Ns...>);
|
||||
|
||||
using is_odd = std::integral_constant<bool, N & 1>;
|
||||
using half = typename make_index_sequence_impl<N / 2>::type;
|
||||
using type = decltype(join<(N + 1) / 2>(is_odd(), half()));
|
||||
};
|
||||
template <> struct make_index_sequence_impl<0> {
|
||||
using type = index_sequence<>;
|
||||
};
|
||||
template <> struct make_index_sequence_impl<1> {
|
||||
using type = index_sequence<0>;
|
||||
};
|
||||
template <> struct make_index_sequence_impl<2> {
|
||||
using type = index_sequence<0, 1>;
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* Creates an index_sequence type for the upper bound \p N.
|
||||
*/
|
||||
template <std::size_t N>
|
||||
using make_index_sequence = typename make_index_sequence_impl<N>::type;
|
||||
}
|
||||
|
||||
#endif // VC_COMMON_INDEXSEQUENCE_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,63 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_INTERLEAVE_H_
|
||||
#define VC_COMMON_INTERLEAVE_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/** \ingroup Utilities
|
||||
Interleaves the entries from \p a and \p b into two vectors of the same type. The order
|
||||
in the returned vector contains the elements `a[0], b[0], a[1], b[1], a[2], b[2], a[3],
|
||||
b[3], ...`.
|
||||
|
||||
Example:
|
||||
\code
|
||||
Vc::SimdArray<int, 4> a = { 1, 2, 3, 4 };
|
||||
Vc::SimdArray<int, 4> b = { 9, 8, 7, 6 };
|
||||
std::tie(a, b) = Vc::interleave(a, b);
|
||||
std::cout << a << b;
|
||||
// prints:
|
||||
// <1 9 2 8><3 7 4 6>
|
||||
\endcode
|
||||
|
||||
\param a input vector whose data will appear at even indexes in the output
|
||||
\param b input vector whose data will appear at odd indexes in the output
|
||||
\return two vectors with data from \p a and \p b interleaved
|
||||
*/
|
||||
template <typename V, typename = enable_if<Traits::is_simd_vector<V>::value>>
|
||||
std::pair<V, V> interleave(const V &a, const V &b)
|
||||
{
|
||||
return {a.interleaveLow(b), a.interleaveHigh(b)};
|
||||
}
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_INTERLEAVE_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,351 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2012-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_INTERLEAVEDMEMORY_H_
|
||||
#define VC_COMMON_INTERLEAVEDMEMORY_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
/**
|
||||
* \internal
|
||||
*/
|
||||
template<typename V, typename I, bool Readonly> struct InterleavedMemoryAccessBase
|
||||
{
|
||||
// Partial specialization doesn't work for functions without partial specialization of the whole
|
||||
// class. Therefore we capture the contents of InterleavedMemoryAccessBase in a macro to easily
|
||||
// copy it into its specializations.
|
||||
typedef typename std::conditional<
|
||||
Readonly, typename std::add_const<typename V::EntryType>::type,
|
||||
typename V::EntryType>::type T;
|
||||
typedef typename V::AsArg VArg;
|
||||
typedef T Ta Vc_MAY_ALIAS;
|
||||
const I m_indexes;
|
||||
Ta *const m_data;
|
||||
|
||||
Vc_ALWAYS_INLINE InterleavedMemoryAccessBase(typename I::AsArg indexes, Ta *data)
|
||||
: m_indexes(indexes), m_data(data)
|
||||
{
|
||||
}
|
||||
|
||||
// implementations of the following are in {scalar,sse,avx}/detail.h
|
||||
template <typename... Vs> Vc_INTRINSIC void deinterleave(Vs &&... vs) const
|
||||
{
|
||||
Impl::deinterleave(m_data, m_indexes, std::forward<Vs>(vs)...);
|
||||
}
|
||||
|
||||
protected:
|
||||
using Impl = Vc::Detail::InterleaveImpl<V, V::Size, sizeof(V)>;
|
||||
|
||||
template <typename T, std::size_t... Indexes>
|
||||
Vc_INTRINSIC void callInterleave(T &&a, index_sequence<Indexes...>)
|
||||
{
|
||||
Impl::interleave(m_data, m_indexes, a[Indexes]...);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \internal
|
||||
*/
|
||||
// delay execution of the deinterleaving gather until operator=
|
||||
template <size_t StructSize, typename V, typename I = typename V::IndexType,
|
||||
bool Readonly>
|
||||
struct InterleavedMemoryReadAccess : public InterleavedMemoryAccessBase<V, I, Readonly>
|
||||
{
|
||||
typedef InterleavedMemoryAccessBase<V, I, Readonly> Base;
|
||||
typedef typename Base::Ta Ta;
|
||||
|
||||
Vc_ALWAYS_INLINE InterleavedMemoryReadAccess(Ta *data, typename I::AsArg indexes)
|
||||
: Base(StructSize == 1u
|
||||
? indexes
|
||||
: StructSize == 2u
|
||||
? indexes << 1
|
||||
: StructSize == 4u
|
||||
? indexes << 2
|
||||
: StructSize == 8u
|
||||
? indexes << 3
|
||||
: StructSize == 16u ? indexes << 4
|
||||
: indexes * I(int(StructSize)),
|
||||
data)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T, std::size_t... Indexes>
|
||||
Vc_ALWAYS_INLINE T deinterleave_unpack(index_sequence<Indexes...>) const
|
||||
{
|
||||
T r;
|
||||
Base::Impl::deinterleave(this->m_data, this->m_indexes, std::get<Indexes>(r)...);
|
||||
return r;
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
typename = enable_if<(std::is_default_constructible<T>::value &&
|
||||
std::is_same<V, Traits::decay<decltype(std::get<0>(
|
||||
std::declval<T &>()))>>::value)>>
|
||||
Vc_ALWAYS_INLINE operator T() const
|
||||
{
|
||||
return deinterleave_unpack<T>(make_index_sequence<std::tuple_size<T>::value>());
|
||||
}
|
||||
};
|
||||
|
||||
///\internal Runtime check (NDEBUG) for asserting unique indexes.
|
||||
template<typename I> struct CheckIndexesUnique
|
||||
{
|
||||
#ifdef NDEBUG
|
||||
static Vc_INTRINSIC void test(const I &) {}
|
||||
#else
|
||||
static void test(const I &indexes)
|
||||
{
|
||||
const I test = indexes.sorted();
|
||||
Vc_ASSERT(I::Size == 1 || (test == test.rotated(1)).isEmpty())
|
||||
}
|
||||
#endif
|
||||
};
|
||||
///\internal For SuccessiveEntries there can never be a problem.
|
||||
template<size_t S> struct CheckIndexesUnique<SuccessiveEntries<S> >
|
||||
{
|
||||
static Vc_INTRINSIC void test(const SuccessiveEntries<S> &) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* \internal
|
||||
*/
|
||||
template <size_t StructSize, typename V, typename I = typename V::IndexType>
|
||||
struct InterleavedMemoryAccess : public InterleavedMemoryReadAccess<StructSize, V, I, false>
|
||||
{
|
||||
typedef InterleavedMemoryAccessBase<V, I, false> Base;
|
||||
typedef typename Base::Ta Ta;
|
||||
|
||||
Vc_ALWAYS_INLINE InterleavedMemoryAccess(Ta *data, typename I::AsArg indexes)
|
||||
: InterleavedMemoryReadAccess<StructSize, V, I, false>(data, indexes)
|
||||
{
|
||||
CheckIndexesUnique<I>::test(indexes);
|
||||
}
|
||||
|
||||
template <int N> Vc_ALWAYS_INLINE void operator=(VectorReferenceArray<N, V> &&rhs)
|
||||
{
|
||||
static_assert(N <= StructSize,
|
||||
"You_are_trying_to_scatter_more_data_into_the_struct_than_it_has");
|
||||
this->callInterleave(std::move(rhs), make_index_sequence<N>());
|
||||
}
|
||||
template <int N> Vc_ALWAYS_INLINE void operator=(VectorReferenceArray<N, const V> &&rhs)
|
||||
{
|
||||
static_assert(N <= StructSize,
|
||||
"You_are_trying_to_scatter_more_data_into_the_struct_than_it_has");
|
||||
this->callInterleave(std::move(rhs), make_index_sequence<N>());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Wraps a pointer to memory with convenience functions to access it via vectors.
|
||||
*
|
||||
* \param S The type of the struct.
|
||||
* \param V The type of the vector to be returned when read. This should reflect the type of the
|
||||
* members inside the struct.
|
||||
*
|
||||
* \see operator[]
|
||||
* \ingroup Containers
|
||||
* \headerfile interleavedmemory.h <Vc/Memory>
|
||||
*/
|
||||
template<typename S, typename V> class InterleavedMemoryWrapper
|
||||
{
|
||||
typedef typename std::conditional<std::is_const<S>::value,
|
||||
const typename V::EntryType,
|
||||
typename V::EntryType>::type T;
|
||||
typedef typename V::IndexType I;
|
||||
typedef typename V::AsArg VArg;
|
||||
typedef const I &IndexType;
|
||||
static constexpr std::size_t StructSize = sizeof(S) / sizeof(T);
|
||||
using ReadAccess = InterleavedMemoryReadAccess<StructSize, V>;
|
||||
using Access =
|
||||
typename std::conditional<std::is_const<T>::value, ReadAccess,
|
||||
InterleavedMemoryAccess<StructSize, V>>::type;
|
||||
using ReadSuccessiveEntries =
|
||||
InterleavedMemoryReadAccess<StructSize, V, SuccessiveEntries<StructSize>>;
|
||||
using AccessSuccessiveEntries = typename std::conditional<
|
||||
std::is_const<T>::value, ReadSuccessiveEntries,
|
||||
InterleavedMemoryAccess<StructSize, V, SuccessiveEntries<StructSize>>>::type;
|
||||
typedef T Ta Vc_MAY_ALIAS;
|
||||
Ta *const m_data;
|
||||
|
||||
static_assert(StructSize * sizeof(T) == sizeof(S),
|
||||
"InterleavedMemoryAccess_does_not_support_packed_structs");
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructs the wrapper object.
|
||||
*
|
||||
* \param s A pointer to a C-array.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE InterleavedMemoryWrapper(S *s)
|
||||
: m_data(reinterpret_cast<Ta *>(s))
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Interleaved scatter/gather access.
|
||||
*
|
||||
* Assuming you have a struct of floats and a vector of \p indexes into the array, this function
|
||||
* can be used to access the struct entries as vectors using the minimal number of store or load
|
||||
* instructions.
|
||||
*
|
||||
* \param indexes Vector of indexes that determine the gather locations.
|
||||
*
|
||||
* \return A special (magic) object that executes the loads and deinterleave on assignment to a
|
||||
* vector tuple.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* struct Foo {
|
||||
* float x, y, z;
|
||||
* };
|
||||
*
|
||||
* void fillWithBar(Foo *_data, uint_v indexes)
|
||||
* {
|
||||
* Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
|
||||
* const float_v x = bar(1);
|
||||
* const float_v y = bar(2);
|
||||
* const float_v z = bar(3);
|
||||
* data[indexes] = (x, y, z);
|
||||
* // it's also possible to just store a subset at the front of the struct:
|
||||
* data[indexes] = (x, y);
|
||||
* // if you want to store a single entry, use scatter:
|
||||
* z.scatter(_data, &Foo::x, indexes);
|
||||
* }
|
||||
*
|
||||
* float_v normalizeStuff(Foo *_data, uint_v indexes)
|
||||
* {
|
||||
* Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
|
||||
* float_v x, y, z;
|
||||
* (x, y, z) = data[indexes];
|
||||
* // it is also possible to just load a subset from the front of the struct:
|
||||
* // (x, y) = data[indexes];
|
||||
* return Vc::sqrt(x * x + y * y + z * z);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* You may think of the gather operation (or scatter as the inverse) like this:
|
||||
\verbatim
|
||||
Memory: {x0 y0 z0 x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4 x5 y5 z5 x6 y6 z6 x7 y7 z7 x8 y8 z8}
|
||||
indexes: [5, 0, 1, 7]
|
||||
Result in (x, y, z): ({x5 x0 x1 x7}, {y5 y0 y1 y7}, {z5 z0 z1 z7})
|
||||
\endverbatim
|
||||
*
|
||||
* \warning If \p indexes contains non-unique entries on scatter, the result is undefined. If
|
||||
* \c NDEBUG is not defined the implementation will assert that the \p indexes entries are unique.
|
||||
*/
|
||||
template <typename IT>
|
||||
Vc_ALWAYS_INLINE enable_if<!std::is_convertible<IT, size_t>::value &&
|
||||
std::is_convertible<IT, IndexType>::value &&
|
||||
!std::is_const<S>::value,
|
||||
Access>
|
||||
operator[](IT indexes)
|
||||
{
|
||||
return Access(m_data, indexes);
|
||||
}
|
||||
|
||||
/// const overload (gathers only) of the above function
|
||||
Vc_ALWAYS_INLINE ReadAccess operator[](IndexType indexes) const
|
||||
{
|
||||
return ReadAccess(m_data, indexes);
|
||||
}
|
||||
|
||||
/// alias of the above function
|
||||
Vc_ALWAYS_INLINE ReadAccess gather(IndexType indexes) const { return operator[](indexes); }
|
||||
|
||||
/**
|
||||
* Interleaved access.
|
||||
*
|
||||
* This function is an optimization of the function above, for cases where the index vector
|
||||
* contains consecutive values. It will load \p V::Size consecutive entries from memory and
|
||||
* deinterleave them into Vc vectors.
|
||||
*
|
||||
* \param first The first of \p V::Size indizes to be accessed.
|
||||
*
|
||||
* \return A special (magic) object that executes the loads and deinterleave on assignment to a
|
||||
* vector tuple.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* struct Foo {
|
||||
* float x, y, z;
|
||||
* };
|
||||
*
|
||||
* void foo(Foo *_data)
|
||||
* {
|
||||
* Vc::InterleavedMemoryWrapper<Foo, float_v> data(_data);
|
||||
* for (size_t i = 0; i < 32U; i += float_v::Size) {
|
||||
* float_v x, y, z;
|
||||
* (x, y, z) = data[i];
|
||||
* // now:
|
||||
* // x = { _data[i].x, _data[i + 1].x, _data[i + 2].x, ... }
|
||||
* // y = { _data[i].y, _data[i + 1].y, _data[i + 2].y, ... }
|
||||
* // z = { _data[i].z, _data[i + 1].z, _data[i + 2].z, ... }
|
||||
* ...
|
||||
* }
|
||||
* }
|
||||
* \endcode
|
||||
*/
|
||||
Vc_ALWAYS_INLINE ReadSuccessiveEntries operator[](size_t first) const
|
||||
{
|
||||
return ReadSuccessiveEntries(m_data, first);
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE AccessSuccessiveEntries operator[](size_t first)
|
||||
{
|
||||
return AccessSuccessiveEntries(m_data, first);
|
||||
}
|
||||
|
||||
//Vc_ALWAYS_INLINE Access scatter(I indexes, VArg v0, VArg v1);
|
||||
};
|
||||
} // namespace Common
|
||||
|
||||
using Common::InterleavedMemoryWrapper;
|
||||
|
||||
/**
|
||||
* Creates an adapter around a given array of structure (AoS) that enables optimized loads
|
||||
* + deinterleaving operations / interleaving operations + stores for vector access (using
|
||||
* \p V).
|
||||
*
|
||||
* \tparam V The `Vc::Vector<T>` type to use per element of the structure.
|
||||
* \param s A pointer to an array of structures containing data members of type `T`.
|
||||
*
|
||||
* \see Vc::Common::InterleavedMemoryWrapper
|
||||
*
|
||||
* \todo Support destructuring via structured bindings.
|
||||
*/
|
||||
template <typename V, typename S>
|
||||
inline Common::InterleavedMemoryWrapper<S, V> make_interleave_wrapper(S *s)
|
||||
{
|
||||
return Common::InterleavedMemoryWrapper<S, V>(s);
|
||||
}
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_INTERLEAVEDMEMORY_H_
|
|
@ -0,0 +1,282 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_ITERATORS_H_
|
||||
#define VC_COMMON_ITERATORS_H_
|
||||
|
||||
#include <array>
|
||||
#include <iterator>
|
||||
#ifdef Vc_MSVC
|
||||
#include <intrin.h> // for _BitScanForward
|
||||
#endif // Vc_MSVC
|
||||
#include "where.h"
|
||||
#include "elementreference.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
template<typename _V, typename Flags> class MemoryVector;
|
||||
template<typename _V, typename Flags> class MemoryVectorIterator;
|
||||
|
||||
template <typename V> class Iterator;
|
||||
template <typename V, bool> class IteratorBase;
|
||||
template <typename V> class IteratorBase<V, true>
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = typename V::value_type;
|
||||
using difference_type = int;
|
||||
using reference = value_type;
|
||||
Vc_ALWAYS_INLINE reference operator*() const { return v()[i()]; }
|
||||
Vc_ALWAYS_INLINE reference operator[](difference_type i2) const { return v()[i2]; }
|
||||
|
||||
private:
|
||||
Vc_INTRINSIC V &v() const { return *static_cast<const Iterator<V> *>(this)->v; }
|
||||
Vc_INTRINSIC difference_type i() const
|
||||
{
|
||||
return static_cast<const Iterator<V> *>(this)->i;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename V> class IteratorBase<V, false>
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = typename V::value_type;
|
||||
using difference_type = int;
|
||||
using reference = Vc::Detail::ElementReference<V, IteratorBase>;
|
||||
Vc_ALWAYS_INLINE reference operator*() const { return {*v(), i()}; }
|
||||
Vc_ALWAYS_INLINE reference operator[](difference_type i2) const { return {*v(), i2}; }
|
||||
|
||||
private:
|
||||
Vc_INTRINSIC V *v() const { return static_cast<const Iterator<V> *>(this)->v; }
|
||||
Vc_INTRINSIC difference_type i() const
|
||||
{
|
||||
return static_cast<const Iterator<V> *>(this)->i;
|
||||
}
|
||||
|
||||
friend reference;
|
||||
static Vc_INTRINSIC value_type get(const V &o, int i)
|
||||
{
|
||||
return o[i];
|
||||
}
|
||||
template <typename T> static Vc_INTRINSIC void set(V &o, int i, T &&v)
|
||||
{
|
||||
o[i] = std::forward<T>(v);
|
||||
}
|
||||
};
|
||||
|
||||
// class Iterator {{{
|
||||
template <typename V> class Iterator : public IteratorBase<V, std::is_const<V>::value>
|
||||
{
|
||||
using Base = IteratorBase<V, std::is_const<V>::value>;
|
||||
friend Base;
|
||||
|
||||
public:
|
||||
using typename Base::iterator_category;
|
||||
using typename Base::value_type;
|
||||
using typename Base::difference_type;
|
||||
using pointer = const Iterator *;
|
||||
using typename Base::reference;
|
||||
|
||||
constexpr Iterator() = default;
|
||||
constexpr Iterator(V &_v, difference_type _i) : v(&_v), i(_i) {}
|
||||
// rely on implicit copy constructor/assignment
|
||||
|
||||
Vc_ALWAYS_INLINE pointer operator->() const { return this; }
|
||||
using Base::operator*;
|
||||
|
||||
Vc_ALWAYS_INLINE Iterator &operator++() { ++i; return *this; }
|
||||
Vc_ALWAYS_INLINE Iterator operator++(int) { Iterator tmp = *this; ++i; return tmp; }
|
||||
|
||||
// bidirectional iteration is supported
|
||||
Vc_ALWAYS_INLINE Iterator &operator--() { --i; return *this; }
|
||||
Vc_ALWAYS_INLINE Iterator operator--(int) { Iterator tmp = *this; --i; return tmp; }
|
||||
|
||||
// RandomAccessIterator:
|
||||
using Base::operator[];
|
||||
Vc_ALWAYS_INLINE Iterator &operator+=(difference_type d) { i += d; return *this; }
|
||||
Vc_ALWAYS_INLINE Iterator &operator-=(difference_type d) { i -= d; return *this; }
|
||||
Vc_ALWAYS_INLINE Iterator operator+(difference_type d) const { return {*v, i + d}; }
|
||||
Vc_ALWAYS_INLINE Iterator operator-(difference_type d) const { return {*v, i - d}; }
|
||||
Vc_ALWAYS_INLINE difference_type operator-(const Iterator &rhs) const { return i - rhs.i; }
|
||||
friend Vc_ALWAYS_INLINE Iterator operator+(difference_type d, const Iterator &rhs)
|
||||
{
|
||||
return {*rhs.v, rhs.i + d};
|
||||
}
|
||||
|
||||
// InputIterator would not need to test v == rhs.v, but except for `reference` this
|
||||
// class implements a complete RandomAccessIterator
|
||||
Vc_ALWAYS_INLINE bool operator==(const Iterator<V> &rhs) const { return v == rhs.v && i == rhs.i; }
|
||||
Vc_ALWAYS_INLINE bool operator!=(const Iterator<V> &rhs) const { return v == rhs.v && i != rhs.i; }
|
||||
Vc_ALWAYS_INLINE bool operator< (const Iterator<V> &rhs) const { return v == rhs.v && i < rhs.i; }
|
||||
Vc_ALWAYS_INLINE bool operator<=(const Iterator<V> &rhs) const { return v == rhs.v && i <= rhs.i; }
|
||||
Vc_ALWAYS_INLINE bool operator> (const Iterator<V> &rhs) const { return v == rhs.v && i > rhs.i; }
|
||||
Vc_ALWAYS_INLINE bool operator>=(const Iterator<V> &rhs) const { return v == rhs.v && i >= rhs.i; }
|
||||
|
||||
private:
|
||||
V *v = nullptr;
|
||||
difference_type i = 0;
|
||||
};/*}}}*/
|
||||
|
||||
template <typename V> using ConstIterator = Iterator<const V>;
|
||||
|
||||
class BitmaskIterator/*{{{*/
|
||||
{
|
||||
#ifdef Vc_MSVC
|
||||
unsigned long mask;
|
||||
unsigned long bit;
|
||||
#else
|
||||
size_t mask;
|
||||
size_t bit;
|
||||
#endif
|
||||
|
||||
void nextBit()
|
||||
{
|
||||
#ifdef Vc_GNU_ASM
|
||||
bit = __builtin_ctzl(mask);
|
||||
#elif defined(Vc_MSVC)
|
||||
_BitScanForward(&bit, mask);
|
||||
#else
|
||||
#error "Not implemented yet. Please contact vc-devel@compeng.uni-frankfurt.de"
|
||||
#endif
|
||||
}
|
||||
void resetLsb()
|
||||
{
|
||||
// 01100100 - 1 = 01100011
|
||||
mask &= (mask - 1);
|
||||
/*
|
||||
#ifdef Vc_GNU_ASM
|
||||
__asm__("btr %1,%0" : "+r"(mask) : "r"(bit));
|
||||
#elif defined(_WIN64)
|
||||
_bittestandreset64(&mask, bit);
|
||||
#elif defined(_WIN32)
|
||||
_bittestandreset(&mask, bit);
|
||||
#else
|
||||
#error "Not implemented yet. Please contact vc-devel@compeng.uni-frankfurt.de"
|
||||
#endif
|
||||
*/
|
||||
}
|
||||
public:
|
||||
BitmaskIterator(decltype(mask) m) : mask(m) { nextBit(); }
|
||||
BitmaskIterator(const BitmaskIterator &) = default;
|
||||
BitmaskIterator(BitmaskIterator &&) = default;
|
||||
|
||||
Vc_ALWAYS_INLINE size_t operator->() const { return bit; }
|
||||
Vc_ALWAYS_INLINE size_t operator*() const { return bit; }
|
||||
|
||||
Vc_ALWAYS_INLINE BitmaskIterator &operator++() { resetLsb(); nextBit(); return *this; }
|
||||
Vc_ALWAYS_INLINE BitmaskIterator operator++(int) { BitmaskIterator tmp = *this; resetLsb(); nextBit(); return tmp; }
|
||||
|
||||
Vc_ALWAYS_INLINE bool operator==(const BitmaskIterator &rhs) const { return mask == rhs.mask; }
|
||||
Vc_ALWAYS_INLINE bool operator!=(const BitmaskIterator &rhs) const { return mask != rhs.mask; }
|
||||
};/*}}}*/
|
||||
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE
|
||||
enable_if<Traits::is_simd_vector<T>::value || Traits::is_simd_mask<T>::value,
|
||||
Iterator<typename std::remove_reference<T>::type>>
|
||||
begin(T &&x)
|
||||
{
|
||||
return {std::forward<T>(x), 0};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE
|
||||
enable_if<Traits::is_simd_vector<T>::value || Traits::is_simd_mask<T>::value,
|
||||
Iterator<typename std::remove_reference<T>::type>>
|
||||
end(T &&x)
|
||||
{
|
||||
using TT = typename std::decay<T>::type;
|
||||
return {std::forward<T>(x), int(TT::size())};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE enable_if<
|
||||
Traits::is_simd_mask<T>::value || Traits::is_simd_vector<T>::value, ConstIterator<T>>
|
||||
cbegin(const T &v)
|
||||
{
|
||||
return {v, 0};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE enable_if<
|
||||
Traits::is_simd_mask<T>::value || Traits::is_simd_vector<T>::value, ConstIterator<T>>
|
||||
cend(const T &v)
|
||||
{
|
||||
return {v, int(T::size())};
|
||||
}
|
||||
|
||||
template<typename M> Vc_ALWAYS_INLINE BitmaskIterator begin(const WhereImpl::WhereMask<M> &w)
|
||||
{
|
||||
return w.mask.toInt();
|
||||
}
|
||||
|
||||
template<typename M> Vc_ALWAYS_INLINE BitmaskIterator end(const WhereImpl::WhereMask<M> &)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename V, typename Flags, typename T> Vc_ALWAYS_INLINE MemoryVectorIterator<V, Flags>
|
||||
makeIterator(T *mem, Flags)
|
||||
{
|
||||
return new(mem) MemoryVector<V, Flags>;
|
||||
}
|
||||
|
||||
template<typename V, typename Flags, typename T> Vc_ALWAYS_INLINE MemoryVectorIterator<const V, Flags>
|
||||
makeIterator(const T *mem, Flags)
|
||||
{
|
||||
return new(const_cast<T *>(mem)) MemoryVector<const V, Flags>;
|
||||
}
|
||||
|
||||
template<typename V, typename Flags, typename FlagsX> Vc_ALWAYS_INLINE MemoryVectorIterator<V, Flags>
|
||||
makeIterator(MemoryVector<V, FlagsX> &mv, Flags)
|
||||
{
|
||||
return new(&mv) MemoryVector<V, Flags>;
|
||||
}
|
||||
|
||||
template<typename V, typename Flags, typename FlagsX> Vc_ALWAYS_INLINE MemoryVectorIterator<const V, Flags>
|
||||
makeIterator(MemoryVector<const V, FlagsX> &mv, Flags)
|
||||
{
|
||||
return new(&mv) MemoryVector<const V, Flags>;
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
|
||||
using Common::begin;
|
||||
using Common::end;
|
||||
using Common::cbegin;
|
||||
using Common::cend;
|
||||
using Common::makeIterator;
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_ITERATORS_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,105 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
// load ctors{{{1
|
||||
/**
|
||||
* Construct a vector from loading its entries from the array at \p mem.
|
||||
*
|
||||
* \param mem A pointer to data. The pointer must not be aligned on a
|
||||
* MemoryAlignment boundary unless you add the Vc::Aligned flag as a second
|
||||
* argument.
|
||||
*/
|
||||
explicit Vc_INTRINSIC Vector(const EntryType *mem)
|
||||
{
|
||||
load(mem);
|
||||
}
|
||||
/**
|
||||
* Construct a vector from loading its entries from the array at \p mem.
|
||||
*
|
||||
* \param mem A pointer to data. If \p flags contains the Vc::Aligned flag, the pointer
|
||||
* must be aligned on a MemoryAlignment boundary.
|
||||
* \param flags A (combination of) flag object(s), such as Vc::Aligned, Vc::Streaming,
|
||||
* Vc::Unaligned, and/or Vc::PrefetchDefault.
|
||||
*/
|
||||
template <typename Flags, typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
|
||||
explicit Vc_INTRINSIC Vector(const EntryType *mem, Flags flags)
|
||||
{
|
||||
load(mem, flags);
|
||||
}
|
||||
|
||||
template <typename U, typename Flags = DefaultLoadTag,
|
||||
typename = enable_if<
|
||||
(!std::is_integral<U>::value || !std::is_integral<EntryType>::value ||
|
||||
sizeof(EntryType) >= sizeof(U)) &&
|
||||
std::is_arithmetic<U>::value &&Traits::is_load_store_flag<Flags>::value>>
|
||||
explicit Vc_INTRINSIC Vector(const U *x, Flags flags = Flags())
|
||||
{
|
||||
load<U, Flags>(x, flags);
|
||||
}
|
||||
|
||||
// load member functions{{{1
|
||||
/**
|
||||
* Load the vector entries from \p mem, overwriting the previous values.
|
||||
*
|
||||
* \param mem
|
||||
* A pointer to data. The pointer must not be aligned on a MemoryAlignment boundary unless
|
||||
* you add the Vc::Aligned flag as a second argument.
|
||||
*/
|
||||
Vc_INTRINSIC void load(const EntryType *mem)
|
||||
{
|
||||
load(mem, DefaultLoadTag());
|
||||
}
|
||||
/**
|
||||
* Load the vector entries from \p mem, overwriting the previous values.
|
||||
*
|
||||
* \param mem
|
||||
* A pointer to data. If \p flags contains the Vc::Aligned flag, the pointer must be
|
||||
* aligned on a MemoryAlignment boundary.
|
||||
* \param flags
|
||||
* A (combination of) flag object(s), such as Vc::Aligned, Vc::Streaming, Vc::Unaligned,
|
||||
* and/or Vc::PrefetchDefault.
|
||||
*/
|
||||
template <typename Flags>
|
||||
Vc_INTRINSIC enable_if<Traits::is_load_store_flag<Flags>::value, void>
|
||||
load(const EntryType *mem, Flags flags)
|
||||
{
|
||||
load<EntryType, Flags>(mem, flags);
|
||||
}
|
||||
private:
|
||||
template <typename U, typename Flags>
|
||||
struct load_concept : public std::enable_if<
|
||||
(!std::is_integral<U>::value || !std::is_integral<EntryType>::value ||
|
||||
sizeof(EntryType) >= sizeof(U)) &&
|
||||
std::is_arithmetic<U>::value && Traits::is_load_store_flag<Flags>::value, void>
|
||||
{};
|
||||
|
||||
public:
|
||||
template <typename U, typename Flags = DefaultLoadTag>
|
||||
Vc_INTRINSIC_L typename load_concept<U, Flags>::type load(const U *mem, Flags = Flags()) Vc_INTRINSIC_R;
|
||||
//}}}1
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,243 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_LOADSTOREFLAGS_H_
|
||||
#define VC_COMMON_LOADSTOREFLAGS_H_
|
||||
|
||||
#include "../traits/type_traits.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
|
||||
/**
|
||||
* Hint for \ref Prefetch to select prefetches that mark the memory as exclusive.
|
||||
*
|
||||
* This hint may optimize the prefetch if the memory will subsequently be written to.
|
||||
*/
|
||||
struct Exclusive {};
|
||||
/**
|
||||
* Hint for \ref Prefetch to select prefetches that mark the memory as shared.
|
||||
*/
|
||||
struct Shared {};
|
||||
|
||||
namespace LoadStoreFlags
|
||||
{
|
||||
|
||||
struct StreamingFlag {};
|
||||
struct UnalignedFlag {};
|
||||
struct PrefetchFlagBase {};
|
||||
// TODO: determine a good default for typical CPU use
|
||||
template <size_t L1 = 16 * 64, size_t L2 = 128 * 64, typename ExclusiveOrShared_ = void>
|
||||
struct PrefetchFlag : public PrefetchFlagBase {
|
||||
typedef ExclusiveOrShared_ ExclusiveOrShared;
|
||||
static constexpr size_t L1Stride = L1;
|
||||
static constexpr size_t L2Stride = L2;
|
||||
static constexpr bool IsExclusive = std::is_same<ExclusiveOrShared, Exclusive>::value;
|
||||
static constexpr bool IsShared = std::is_same<ExclusiveOrShared, Shared>::value;
|
||||
};
|
||||
|
||||
template<typename Base, typename Default, typename... LoadStoreFlags> struct ExtractType
|
||||
{
|
||||
typedef Default type;
|
||||
};
|
||||
template<typename Base, typename Default, typename T, typename... LoadStoreFlags> struct ExtractType<Base, Default, T, LoadStoreFlags...>
|
||||
{
|
||||
typedef typename std::conditional<std::is_base_of<Base, T>::value, T, typename ExtractType<Base, Default, LoadStoreFlags...>::type>::type type;
|
||||
};
|
||||
|
||||
// ICC warns about the constexpr members in LoadStoreFlags: member "LoadStoreFlags<Flags...>::IsAligned" was declared but never referenced
|
||||
// who needs that warning, especially if it was referenced...
|
||||
// The warning cannot be reenabled because it gets emitted whenever the LoadStoreFlags is instantiated
|
||||
// somewhere, so it could be anywhere.
|
||||
#ifdef Vc_ICC
|
||||
#pragma warning(disable: 177)
|
||||
#endif
|
||||
/**\internal
|
||||
* Implementation of the load/store flags mechanism. This is internal API. Only some
|
||||
* concrete aliases are API-relevant types.
|
||||
*/
|
||||
template<typename... Flags> struct LoadStoreFlags
|
||||
{
|
||||
private:
|
||||
// ICC doesn't grok this line:
|
||||
//template<typename Test> using TestFlag = std::is_same<typename ExtractType<StreamingFlag, void, Flags...>::type, void>;
|
||||
typedef typename ExtractType<PrefetchFlagBase, PrefetchFlag<0, 0>, Flags...>::type Prefetch;
|
||||
|
||||
public:
|
||||
constexpr LoadStoreFlags() {}
|
||||
|
||||
static constexpr bool IsStreaming = !std::is_same<typename ExtractType<StreamingFlag, void, Flags...>::type, void>::value;
|
||||
static constexpr bool IsUnaligned = !std::is_same<typename ExtractType<UnalignedFlag, void, Flags...>::type, void>::value;
|
||||
static constexpr bool IsAligned = !IsUnaligned;
|
||||
static constexpr bool IsPrefetch = !std::is_same<typename ExtractType<PrefetchFlagBase, void, Flags...>::type, void>::value;
|
||||
static constexpr bool IsExclusivePrefetch = Prefetch::IsExclusive;
|
||||
static constexpr bool IsSharedPrefetch = Prefetch::IsShared;
|
||||
static constexpr size_t L1Stride = Prefetch::L1Stride;
|
||||
static constexpr size_t L2Stride = Prefetch::L2Stride;
|
||||
|
||||
typedef LoadStoreFlags<typename std::conditional<std::is_same<Flags, UnalignedFlag>::value, void, Flags>::type...> UnalignedRemoved;
|
||||
|
||||
// The following EnableIf* convenience types cannot use enable_if because then no LoadStoreFlags type
|
||||
// could ever be instantiated. Instead these types are defined either as void* or void. The
|
||||
// function that does SFINAE then assigns "= nullptr" to this type. Thus, the ones with just
|
||||
// void result in substitution failure.
|
||||
typedef typename std::conditional<IsAligned && !IsStreaming, void *, void>::type EnableIfAligned;
|
||||
typedef typename std::conditional<IsAligned && IsStreaming, void *, void>::type EnableIfStreaming;
|
||||
typedef typename std::conditional<IsUnaligned && !IsStreaming, void *, void>::type EnableIfUnalignedNotStreaming;
|
||||
typedef typename std::conditional<IsUnaligned && IsStreaming, void *, void>::type EnableIfUnalignedAndStreaming;
|
||||
typedef typename std::conditional<IsUnaligned , void *, void>::type EnableIfUnaligned;
|
||||
typedef typename std::conditional<!IsUnaligned , void *, void>::type EnableIfNotUnaligned;
|
||||
typedef typename std::conditional<IsPrefetch , void *, void>::type EnableIfPrefetch;
|
||||
typedef typename std::conditional<!IsPrefetch , void *, void>::type EnableIfNotPrefetch;
|
||||
};
|
||||
|
||||
/**\internal
|
||||
* Specialization for no flags (i.e aligned, non-streaming, no prefetching)
|
||||
*/
|
||||
template<> struct LoadStoreFlags<>
|
||||
{
|
||||
constexpr LoadStoreFlags() {}
|
||||
|
||||
static constexpr bool IsStreaming = false;
|
||||
static constexpr bool IsUnaligned = false;
|
||||
static constexpr bool IsAligned = !IsUnaligned;
|
||||
static constexpr bool IsPrefetch = false;
|
||||
static constexpr bool IsExclusivePrefetch = false;
|
||||
static constexpr bool IsSharedPrefetch = false;
|
||||
static constexpr size_t L1Stride = 0;
|
||||
static constexpr size_t L2Stride = 0;
|
||||
typedef void* EnableIfAligned;
|
||||
typedef void* EnableIfNotUnaligned;
|
||||
typedef void* EnableIfNotPrefetch;
|
||||
};
|
||||
|
||||
/**
|
||||
* Operator for concatenation of LoadStoreFlags.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* float_v x(mem, Vc::Aligned | Vc::Streaming);
|
||||
* \endcode
|
||||
*/
|
||||
template<typename... LFlags, typename... RFlags>
|
||||
constexpr LoadStoreFlags<LFlags..., RFlags...> operator|(LoadStoreFlags<LFlags...>, LoadStoreFlags<RFlags...>)
|
||||
{
|
||||
return LoadStoreFlags<LFlags..., RFlags...>();
|
||||
}
|
||||
|
||||
} // LoadStoreFlags namespace
|
||||
|
||||
using LoadStoreFlags::PrefetchFlag;
|
||||
|
||||
typedef LoadStoreFlags::LoadStoreFlags<> AlignedTag;
|
||||
typedef LoadStoreFlags::LoadStoreFlags<LoadStoreFlags::StreamingFlag> StreamingTag;
|
||||
typedef LoadStoreFlags::LoadStoreFlags<LoadStoreFlags::UnalignedFlag> UnalignedTag;
|
||||
|
||||
/// The default load tag type uses unaligned (non-streaming) loads.
|
||||
typedef UnalignedTag DefaultLoadTag;
|
||||
/// The default store tag type uses unaligned (non-streaming) stores.
|
||||
typedef UnalignedTag DefaultStoreTag;
|
||||
|
||||
/**\addtogroup Utilities
|
||||
* @{
|
||||
*/
|
||||
/**
|
||||
* Use this object for a \p flags parameter to request aligned loads and stores.
|
||||
*
|
||||
* It specifies that a load/store can expect a memory address that is aligned on
|
||||
* the correct boundary. (i.e. \p MemoryAlignment)
|
||||
*
|
||||
* \warning
|
||||
* If you specify Aligned, but the memory address is not aligned the program
|
||||
* will most likely crash.
|
||||
*/
|
||||
constexpr AlignedTag Aligned;
|
||||
|
||||
/**
|
||||
* Use this object for a \p flags parameter to request unaligned loads and stores.
|
||||
*
|
||||
* It specifies that a load/store can \em not expect a memory address that is
|
||||
* aligned on the correct boundary. (i.e. alignment is less than
|
||||
* \p MemoryAlignment)
|
||||
*
|
||||
* \note
|
||||
* If you specify Unaligned, but the memory address is aligned the load/store
|
||||
* will execute slightly slower than necessary.
|
||||
*/
|
||||
constexpr UnalignedTag Unaligned;
|
||||
|
||||
/**
|
||||
* Use this object for a \p flags parameter to request streaming loads and stores.
|
||||
*
|
||||
* It specifies that the cache should be bypassed for the given load/store.
|
||||
* Whether this will actually be done depends on the target system's capabilities.
|
||||
*
|
||||
* Streaming stores can be interesting when the code calculates values that, after being
|
||||
* written to memory, will not be used for a long time or used by a different thread.
|
||||
*
|
||||
* \note
|
||||
* Expect that most target systems do not support unaligned streaming loads or stores.
|
||||
* Therefore, make sure that you also specify Aligned.
|
||||
*/
|
||||
constexpr StreamingTag Streaming;
|
||||
|
||||
/**
|
||||
* Use this object for a \p flags parameter to request default software prefetches to be
|
||||
* emitted.
|
||||
*/
|
||||
constexpr LoadStoreFlags::LoadStoreFlags<PrefetchFlag<>> PrefetchDefault;
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \tparam L1
|
||||
* \tparam L2
|
||||
* \tparam ExclusiveOrShared
|
||||
*/
|
||||
template <size_t L1 = PrefetchFlag<>::L1Stride,
|
||||
size_t L2 = PrefetchFlag<>::L2Stride,
|
||||
typename ExclusiveOrShared = PrefetchFlag<>::ExclusiveOrShared>
|
||||
struct Prefetch : public LoadStoreFlags::LoadStoreFlags<PrefetchFlag<L1, L2, ExclusiveOrShared>>
|
||||
{
|
||||
};
|
||||
|
||||
namespace Traits
|
||||
{
|
||||
///\internal partial specialization for detecting LoadStoreFlags types
|
||||
template <typename... Ts>
|
||||
struct is_loadstoreflag_internal<LoadStoreFlags::LoadStoreFlags<Ts...>> : public std::true_type
|
||||
{
|
||||
};
|
||||
///\internal partial specialization for detecting the derived Prefetch type as a
|
||||
/// load/store flag.
|
||||
template <size_t L1, size_t L2, typename ExclusiveOrShared>
|
||||
struct is_loadstoreflag_internal<Prefetch<L1, L2, ExclusiveOrShared>> : public std::true_type
|
||||
{
|
||||
};
|
||||
} // namespace Traits
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_LOADSTOREFLAGS_H_
|
|
@ -0,0 +1,276 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
/* The log implementations are based on code from Julien Pommier which carries the following
|
||||
copyright information:
|
||||
*/
|
||||
/*
|
||||
Inspired by Intel Approximate Math library, and based on the
|
||||
corresponding algorithms of the cephes math library
|
||||
*/
|
||||
/* Copyright (C) 2007 Julien Pommier
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
(this is the zlib license)
|
||||
*/
|
||||
|
||||
#ifdef Vc_COMMON_MATH_H_INTERNAL
|
||||
|
||||
enum LogarithmBase {
|
||||
BaseE, Base10, Base2
|
||||
};
|
||||
|
||||
namespace Detail
|
||||
{
|
||||
template <typename T, typename Abi>
|
||||
using Const = typename std::conditional<std::is_same<Abi, VectorAbi::Avx>::value,
|
||||
AVX::Const<T>, SSE::Const<T>>::type;
|
||||
|
||||
template<LogarithmBase Base>
|
||||
struct LogImpl
|
||||
{
|
||||
template<typename T, typename Abi> static Vc_ALWAYS_INLINE void log_series(Vector<T, Abi> &Vc_RESTRICT x, typename Vector<T, Abi>::AsArg exponent) {
|
||||
typedef Vector<T, Abi> V;
|
||||
typedef Detail::Const<T, Abi> C;
|
||||
// Taylor series around x = 2^exponent
|
||||
// f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large
|
||||
// f'(x) = x⁻¹ → x → 1
|
||||
// f''(x) = - x⁻² → -x² / 2 → C::_1_2()
|
||||
// = 2!x⁻³ → x³ / 3 → C::P(8)
|
||||
// = -3!x⁻⁴ → -x⁴ / 4 → C::P(7)
|
||||
// = 4!x⁻⁵ → x⁵ / 5 → C::P(6)
|
||||
// ...
|
||||
// The high order coefficients are adjusted to reduce the error that occurs from ommission
|
||||
// of higher order terms.
|
||||
// P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
|
||||
// The order of additions must go from smallest to largest terms
|
||||
const V x2 = x * x; // 0 → 4
|
||||
#ifdef Vc_LOG_ILP
|
||||
V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
|
||||
V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
|
||||
V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
|
||||
const V x3 = x2 * x; // 7 → 11
|
||||
const V x6 = x3 * x3; // 11 → 15
|
||||
const V x9 = x6 * x3; // 15 → 19
|
||||
V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
|
||||
#elif defined Vc_LOG_ILP2
|
||||
/*
|
||||
* name start done
|
||||
* movaps %xmm0, %xmm1 ; x 0 1
|
||||
* movaps %xmm0, %xmm2 ; x 0 1
|
||||
* mulps %xmm1, %xmm1 ; x2 1 5 *xmm1
|
||||
* movaps <P8>, %xmm15 ; y8 1 2
|
||||
* mulps %xmm1, %xmm2 ; x3 5 9 *xmm2
|
||||
* movaps %xmm1, %xmm3 ; x2 5 6
|
||||
* movaps %xmm1, %xmm4 ; x2 5 6
|
||||
* mulps %xmm3, %xmm3 ; x4 6 10 *xmm3
|
||||
* movaps %xmm2, %xmm5 ; x3 9 10
|
||||
* movaps %xmm2, %xmm6 ; x3 9 10
|
||||
* mulps %xmm2, %xmm4 ; x5 9 13 *xmm4
|
||||
* movaps %xmm3, %xmm7 ; x4 10 11
|
||||
* movaps %xmm3, %xmm8 ; x4 10 11
|
||||
* movaps %xmm3, %xmm9 ; x4 10 11
|
||||
* mulps %xmm5, %xmm5 ; x6 10 14 *xmm5
|
||||
* mulps %xmm3, %xmm6 ; x7 11 15 *xmm6
|
||||
* mulps %xmm7, %xmm7 ; x8 12 16 *xmm7
|
||||
* movaps %xmm4, %xmm10 ; x5 13 14
|
||||
* mulps %xmm4, %xmm8 ; x9 13 17 *xmm8
|
||||
* mulps %xmm5, %xmm10 ; x11 14 18 *xmm10
|
||||
* mulps %xmm5, %xmm9 ; x10 15 19 *xmm9
|
||||
* mulps <P0>, %xmm10 ; y0 18 22
|
||||
* mulps <P1>, %xmm9 ; y1 19 23
|
||||
* mulps <P2>, %xmm8 ; y2 20 24
|
||||
* mulps <P3>, %xmm7 ; y3 21 25
|
||||
* addps %xmm10, %xmm9 ; y 23 26
|
||||
* addps %xmm9, %xmm8 ; y 26 29
|
||||
* addps %xmm8, %xmm7 ; y 29 32
|
||||
*/
|
||||
const V x3 = x2 * x; // 4 → 8
|
||||
const V x4 = x2 * x2; // 5 → 9
|
||||
const V x5 = x2 * x3; // 8 → 12
|
||||
const V x6 = x3 * x3; // 9 → 13
|
||||
const V x7 = x4 * x3; //
|
||||
const V x8 = x4 * x4;
|
||||
const V x9 = x5 * x4;
|
||||
const V x10 = x5 * x5;
|
||||
const V x11 = x5 * x6; // 13 → 17
|
||||
V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
|
||||
+ C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3;
|
||||
#else
|
||||
V y = C::P(0);
|
||||
Vc::Common::unrolled_loop<int, 1, 9>([&](int i) { y = y * x + C::P(i); });
|
||||
y *= x * x2;
|
||||
#endif
|
||||
switch (Base) {
|
||||
case BaseE:
|
||||
// ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
|
||||
y += exponent * C::ln2_small();
|
||||
y -= x2 * C::_1_2(); // [0, 0.25[
|
||||
x += y;
|
||||
x += exponent * C::ln2_large();
|
||||
break;
|
||||
case Base10:
|
||||
y += exponent * C::ln2_small();
|
||||
y -= x2 * C::_1_2(); // [0, 0.25[
|
||||
x += y;
|
||||
x += exponent * C::ln2_large();
|
||||
x *= C::log10_e();
|
||||
break;
|
||||
case Base2:
|
||||
{
|
||||
const V x_ = x;
|
||||
x *= C::log2_e();
|
||||
y *= C::log2_e();
|
||||
y -= x_ * x * C::_1_2(); // [0, 0.25[
|
||||
x += y;
|
||||
x += exponent;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Abi>
|
||||
static Vc_ALWAYS_INLINE void log_series(Vector<double, Abi> &Vc_RESTRICT x,
|
||||
typename Vector<double, Abi>::AsArg exponent)
|
||||
{
|
||||
typedef Vector<double, Abi> V;
|
||||
typedef Detail::Const<double, Abi> C;
|
||||
const V x2 = x * x;
|
||||
V y = C::P(0);
|
||||
V y2 = C::Q(0) + x;
|
||||
Vc::Common::unrolled_loop<int, 1, 5>([&](int i) {
|
||||
y = y * x + C::P(i);
|
||||
y2 = y2 * x + C::Q(i);
|
||||
});
|
||||
y2 = x / y2;
|
||||
y = y * x + C::P(5);
|
||||
y = x2 * y * y2;
|
||||
// TODO: refactor the following with the float implementation:
|
||||
switch (Base) {
|
||||
case BaseE:
|
||||
// ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
|
||||
y += exponent * C::ln2_small();
|
||||
y -= x2 * C::_1_2(); // [0, 0.25[
|
||||
x += y;
|
||||
x += exponent * C::ln2_large();
|
||||
break;
|
||||
case Base10:
|
||||
y += exponent * C::ln2_small();
|
||||
y -= x2 * C::_1_2(); // [0, 0.25[
|
||||
x += y;
|
||||
x += exponent * C::ln2_large();
|
||||
x *= C::log10_e();
|
||||
break;
|
||||
case Base2:
|
||||
{
|
||||
const V x_ = x;
|
||||
x *= C::log2_e();
|
||||
y *= C::log2_e();
|
||||
y -= x_ * x * C::_1_2(); // [0, 0.25[
|
||||
x += y;
|
||||
x += exponent;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Abi, typename V = Vector<T, Abi>>
|
||||
static inline Vector<T, Abi> calc(V _x)
|
||||
{
|
||||
typedef typename V::Mask M;
|
||||
typedef Detail::Const<T, Abi> C;
|
||||
|
||||
V x(_x);
|
||||
|
||||
const M invalidMask = x < V::Zero();
|
||||
const M infinityMask = x == V::Zero();
|
||||
const M denormal = x <= C::min();
|
||||
|
||||
x(denormal) *= V(Vc::Detail::doubleConstant<1, 0, 54>()); // 2²⁵
|
||||
V exponent = Detail::exponent(x.data()); // = ⎣log₂(x)⎦
|
||||
exponent(denormal) -= 54;
|
||||
|
||||
x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
|
||||
x = Detail::operator|(x,
|
||||
C::_1_2()); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[
|
||||
|
||||
// split calculation in two cases:
|
||||
// A: x ∈ [½, √½[
|
||||
// B: x ∈ [√½, 1[
|
||||
// √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
|
||||
// log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
|
||||
|
||||
const M smallX = x < C::_1_sqrt2();
|
||||
x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[
|
||||
x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
|
||||
exponent(!smallX) += V::One();
|
||||
|
||||
log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
|
||||
|
||||
x.setQnan(invalidMask); // x < 0 → NaN
|
||||
x(infinityMask) = C::neginf(); // x = 0 → -∞
|
||||
|
||||
return x;
|
||||
}
|
||||
};
|
||||
} // namespace Detail
|
||||
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log(
|
||||
const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::LogImpl<BaseE>::calc<T, Abi>(x);
|
||||
}
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log10(
|
||||
const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::LogImpl<Base10>::calc<T, Abi>(x);
|
||||
}
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log2(
|
||||
const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::LogImpl<Base2>::calc<T, Abi>(x);
|
||||
}
|
||||
|
||||
#endif // Vc_COMMON_MATH_H_INTERNAL
|
|
@ -0,0 +1,318 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2010-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MACROS_H_
|
||||
#define VC_COMMON_MACROS_H_
|
||||
|
||||
#include "../global.h"
|
||||
|
||||
|
||||
#ifdef Vc_MSVC
|
||||
#define Vc_ALIGNED_TYPEDEF(n_, type_, new_type_) \
|
||||
typedef __declspec(align(n_)) type_ new_type_
|
||||
#elif __GNUC__
|
||||
#define Vc_ALIGNED_TYPEDEF(n_, type_, new_type_) \
|
||||
typedef type_ new_type_[[gnu::aligned(n_)]]
|
||||
#else // the following is actually ill-formed according to C++1[14]
|
||||
#define Vc_ALIGNED_TYPEDEF(n_, type_, new_type_) \
|
||||
using new_type_ alignas(sizeof(n_)) = type_
|
||||
#endif
|
||||
|
||||
// On Windows (WIN32) we might see macros called min and max. Just undefine them and hope
|
||||
// noone (re)defines them (NOMINMAX should help).
|
||||
#ifdef WIN32
|
||||
#define NOMINMAX 1
|
||||
#if defined min
|
||||
#undef min
|
||||
#endif
|
||||
#if defined max
|
||||
#undef max
|
||||
#endif
|
||||
#endif // WIN32
|
||||
|
||||
#if defined Vc_GCC && Vc_GCC >= 0x60000
|
||||
// GCC 6 drops all attributes on types passed as template arguments. This is important
|
||||
// if a may_alias gets lost and therefore needs to be readded in the implementation of
|
||||
// the class template.
|
||||
#define Vc_TEMPLATES_DROP_ATTRIBUTES 1
|
||||
#endif
|
||||
|
||||
#if defined Vc_CLANG || defined Vc_APPLECLANG
|
||||
# define Vc_UNREACHABLE __builtin_unreachable
|
||||
# define Vc_NEVER_INLINE [[gnu::noinline]]
|
||||
# define Vc_INTRINSIC_L inline
|
||||
# define Vc_INTRINSIC_R __attribute__((always_inline))
|
||||
# define Vc_INTRINSIC Vc_INTRINSIC_L Vc_INTRINSIC_R
|
||||
# define Vc_FLATTEN
|
||||
# define Vc_CONST __attribute__((const))
|
||||
# define Vc_CONST_L
|
||||
# define Vc_CONST_R Vc_CONST
|
||||
# define Vc_PURE __attribute__((pure))
|
||||
# define Vc_PURE_L
|
||||
# define Vc_PURE_R Vc_PURE
|
||||
# define Vc_MAY_ALIAS __attribute__((may_alias))
|
||||
# define Vc_ALWAYS_INLINE_L inline
|
||||
# define Vc_ALWAYS_INLINE_R __attribute__((always_inline))
|
||||
# define Vc_ALWAYS_INLINE Vc_ALWAYS_INLINE_L Vc_ALWAYS_INLINE_R
|
||||
# define Vc_IS_UNLIKELY(x) __builtin_expect(x, 0)
|
||||
# define Vc_IS_LIKELY(x) __builtin_expect(x, 1)
|
||||
# define Vc_RESTRICT __restrict__
|
||||
# define Vc_DEPRECATED(msg)
|
||||
# define Vc_DEPRECATED_ALIAS(msg)
|
||||
# define Vc_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
|
||||
#elif defined(__GNUC__)
|
||||
# define Vc_UNREACHABLE __builtin_unreachable
|
||||
# if defined Vc_GCC && !defined __OPTIMIZE__
|
||||
# define Vc_MAY_ALIAS
|
||||
# else
|
||||
# define Vc_MAY_ALIAS __attribute__((__may_alias__))
|
||||
# endif
|
||||
# define Vc_INTRINSIC_R __attribute__((__always_inline__, __artificial__))
|
||||
# define Vc_INTRINSIC_L inline
|
||||
# define Vc_INTRINSIC Vc_INTRINSIC_L Vc_INTRINSIC_R
|
||||
# define Vc_FLATTEN __attribute__((__flatten__))
|
||||
# define Vc_ALWAYS_INLINE_L inline
|
||||
# define Vc_ALWAYS_INLINE_R __attribute__((__always_inline__))
|
||||
# define Vc_ALWAYS_INLINE Vc_ALWAYS_INLINE_L Vc_ALWAYS_INLINE_R
|
||||
# ifdef Vc_ICC
|
||||
// ICC miscompiles if there are functions marked as pure or const
|
||||
# define Vc_PURE
|
||||
# define Vc_CONST
|
||||
# define Vc_NEVER_INLINE
|
||||
# else
|
||||
# define Vc_NEVER_INLINE [[gnu::noinline]]
|
||||
# define Vc_PURE __attribute__((__pure__))
|
||||
# define Vc_CONST __attribute__((__const__))
|
||||
# endif
|
||||
# define Vc_CONST_L
|
||||
# define Vc_CONST_R Vc_CONST
|
||||
# define Vc_PURE_L
|
||||
# define Vc_PURE_R Vc_PURE
|
||||
# define Vc_IS_UNLIKELY(x) __builtin_expect(x, 0)
|
||||
# define Vc_IS_LIKELY(x) __builtin_expect(x, 1)
|
||||
# define Vc_RESTRICT __restrict__
|
||||
# ifdef Vc_ICC
|
||||
# define Vc_DEPRECATED(msg)
|
||||
# define Vc_DEPRECATED_ALIAS(msg)
|
||||
# else
|
||||
# define Vc_DEPRECATED(msg) __attribute__((__deprecated__(msg)))
|
||||
# define Vc_DEPRECATED_ALIAS(msg) __attribute__((__deprecated__(msg)))
|
||||
# endif
|
||||
# define Vc_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
|
||||
#else
|
||||
# define Vc_NEVER_INLINE
|
||||
# define Vc_FLATTEN
|
||||
# ifdef Vc_PURE
|
||||
# undef Vc_PURE
|
||||
# endif
|
||||
# define Vc_MAY_ALIAS
|
||||
# ifdef Vc_MSVC
|
||||
# define Vc_ALWAYS_INLINE inline __forceinline
|
||||
# define Vc_ALWAYS_INLINE_L Vc_ALWAYS_INLINE
|
||||
# define Vc_ALWAYS_INLINE_R
|
||||
# define Vc_CONST __declspec(noalias)
|
||||
# define Vc_CONST_L Vc_CONST
|
||||
# define Vc_CONST_R
|
||||
# define Vc_PURE /*Vc_CONST*/
|
||||
# define Vc_PURE_L Vc_PURE
|
||||
# define Vc_PURE_R
|
||||
# define Vc_INTRINSIC inline __forceinline
|
||||
# define Vc_INTRINSIC_L Vc_INTRINSIC
|
||||
# define Vc_INTRINSIC_R
|
||||
namespace Vc_VERSIONED_NAMESPACE {
|
||||
namespace detail
|
||||
{
|
||||
static Vc_INTRINSIC void unreachable() { __assume(0); }
|
||||
} // namespace detail
|
||||
}
|
||||
# define Vc_UNREACHABLE Vc::detail::unreachable
|
||||
# else
|
||||
# define Vc_ALWAYS_INLINE
|
||||
# define Vc_ALWAYS_INLINE_L
|
||||
# define Vc_ALWAYS_INLINE_R
|
||||
# define Vc_CONST
|
||||
# define Vc_CONST_L
|
||||
# define Vc_CONST_R
|
||||
# define Vc_PURE
|
||||
# define Vc_PURE_L
|
||||
# define Vc_PURE_R
|
||||
# define Vc_INTRINSIC
|
||||
# define Vc_INTRINSIC_L
|
||||
# define Vc_INTRINSIC_R
|
||||
# define Vc_UNREACHABLE std::abort
|
||||
# endif
|
||||
# define Vc_IS_UNLIKELY(x) x
|
||||
# define Vc_IS_LIKELY(x) x
|
||||
# define Vc_RESTRICT __restrict
|
||||
# define Vc_DEPRECATED(msg) __declspec(deprecated(msg))
|
||||
# define Vc_DEPRECATED_ALIAS(msg)
|
||||
# define Vc_WARN_UNUSED_RESULT
|
||||
#endif
|
||||
|
||||
#ifdef Vc_CXX14
|
||||
#undef Vc_DEPRECATED
|
||||
#define Vc_DEPRECATED(msg_) [[deprecated(msg_)]]
|
||||
#endif
|
||||
|
||||
#define Vc_NOTHING_EXPECTING_SEMICOLON static_assert(true, "")
|
||||
|
||||
#define Vc_FREE_STORE_OPERATORS_ALIGNED(align_) \
|
||||
/**\name new/delete overloads for correct alignment */ \
|
||||
/**@{*/ \
|
||||
/*!\brief Allocates correctly aligned memory */ \
|
||||
Vc_ALWAYS_INLINE void *operator new(size_t size) \
|
||||
{ \
|
||||
return Vc::Common::aligned_malloc<align_>(size); \
|
||||
} \
|
||||
/*!\brief Returns \p p. */ \
|
||||
Vc_ALWAYS_INLINE void *operator new(size_t, void *p) { return p; } \
|
||||
/*!\brief Allocates correctly aligned memory */ \
|
||||
Vc_ALWAYS_INLINE void *operator new[](size_t size) \
|
||||
{ \
|
||||
return Vc::Common::aligned_malloc<align_>(size); \
|
||||
} \
|
||||
/*!\brief Returns \p p. */ \
|
||||
Vc_ALWAYS_INLINE void *operator new[](size_t, void *p) { return p; } \
|
||||
/*!\brief Frees aligned memory. */ \
|
||||
Vc_ALWAYS_INLINE void operator delete(void *ptr, size_t) { Vc::Common::free(ptr); } \
|
||||
/*!\brief Does nothing. */ \
|
||||
Vc_ALWAYS_INLINE void operator delete(void *, void *) {} \
|
||||
/*!\brief Frees aligned memory. */ \
|
||||
Vc_ALWAYS_INLINE void operator delete[](void *ptr, size_t) \
|
||||
{ \
|
||||
Vc::Common::free(ptr); \
|
||||
} \
|
||||
/*!\brief Does nothing. */ \
|
||||
Vc_ALWAYS_INLINE void operator delete[](void *, void *) {} \
|
||||
/**@}*/ \
|
||||
Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
|
||||
#ifdef Vc_ASSERT
|
||||
#define Vc_EXTERNAL_ASSERT 1
|
||||
#else
|
||||
#ifdef NDEBUG
|
||||
#define Vc_ASSERT(x)
|
||||
#else
|
||||
#include <assert.h>
|
||||
#define Vc_ASSERT(x) assert(x);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined Vc_CLANG || defined Vc_APPLECLANG
|
||||
#define Vc_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
#define Vc_HAS_BUILTIN(x) 0
|
||||
#endif
|
||||
|
||||
#define Vc_CAT_HELPER_(a, b, c, d) a##b##c##d
|
||||
#define Vc_CAT(a, b, c, d) Vc_CAT_HELPER_(a, b, c, d)
|
||||
|
||||
#define Vc_CAT_IMPL(a, b) a##b
|
||||
#define Vc_CAT2(a, b) Vc_CAT_IMPL(a, b)
|
||||
|
||||
#define Vc_APPLY_IMPL_1_(macro, a, b, c, d, e) macro(a)
|
||||
#define Vc_APPLY_IMPL_2_(macro, a, b, c, d, e) macro(a, b)
|
||||
#define Vc_APPLY_IMPL_3_(macro, a, b, c, d, e) macro(a, b, c)
|
||||
#define Vc_APPLY_IMPL_4_(macro, a, b, c, d, e) macro(a, b, c, d)
|
||||
#define Vc_APPLY_IMPL_5_(macro, a, b, c, d, e) macro(a, b, c, d, e)
|
||||
|
||||
#define Vc_LIST_FLOAT_VECTOR_TYPES(size, macro, a, b, c, d) \
|
||||
size(macro, double_v, a, b, c, d) \
|
||||
size(macro, float_v, a, b, c, d)
|
||||
#define Vc_LIST_INT_VECTOR_TYPES(size, macro, a, b, c, d) \
|
||||
size(macro, int_v, a, b, c, d) \
|
||||
size(macro, uint_v, a, b, c, d) \
|
||||
size(macro, short_v, a, b, c, d) \
|
||||
size(macro, ushort_v, a, b, c, d)
|
||||
#define Vc_LIST_VECTOR_TYPES(size, macro, a, b, c, d) \
|
||||
Vc_LIST_FLOAT_VECTOR_TYPES(size, macro, a, b, c, d) \
|
||||
Vc_LIST_INT_VECTOR_TYPES(size, macro, a, b, c, d)
|
||||
#define Vc_LIST_COMPARES(size, macro, a, b, c, d) \
|
||||
size(macro, ==, a, b, c, d) \
|
||||
size(macro, !=, a, b, c, d) \
|
||||
size(macro, <=, a, b, c, d) \
|
||||
size(macro, >=, a, b, c, d) \
|
||||
size(macro, < , a, b, c, d) \
|
||||
size(macro, > , a, b, c, d)
|
||||
#define Vc_LIST_LOGICAL(size, macro, a, b, c, d) \
|
||||
size(macro, &&, a, b, c, d) \
|
||||
size(macro, ||, a, b, c, d)
|
||||
#define Vc_LIST_BINARY(size, macro, a, b, c, d) \
|
||||
size(macro, |, a, b, c, d) \
|
||||
size(macro, &, a, b, c, d) \
|
||||
size(macro, ^, a, b, c, d)
|
||||
#define Vc_LIST_SHIFTS(size, macro, a, b, c, d) \
|
||||
size(macro, <<, a, b, c, d) \
|
||||
size(macro, >>, a, b, c, d)
|
||||
#define Vc_LIST_ARITHMETICS(size, macro, a, b, c, d) \
|
||||
size(macro, +, a, b, c, d) \
|
||||
size(macro, -, a, b, c, d) \
|
||||
size(macro, *, a, b, c, d) \
|
||||
size(macro, /, a, b, c, d) \
|
||||
size(macro, %, a, b, c, d)
|
||||
|
||||
#define Vc_APPLY_0(_list, macro) _list(Vc_APPLY_IMPL_1_, macro, 0, 0, 0, 0) Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
#define Vc_APPLY_1(_list, macro, a) _list(Vc_APPLY_IMPL_2_, macro, a, 0, 0, 0) Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
#define Vc_APPLY_2(_list, macro, a, b) _list(Vc_APPLY_IMPL_3_, macro, a, b, 0, 0) Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
#define Vc_APPLY_3(_list, macro, a, b, c) _list(Vc_APPLY_IMPL_4_, macro, a, b, c, 0) Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
#define Vc_APPLY_4(_list, macro, a, b, c, d) _list(Vc_APPLY_IMPL_5_, macro, a, b, c, d) Vc_NOTHING_EXPECTING_SEMICOLON
|
||||
|
||||
#define Vc_ALL_COMPARES(macro) Vc_APPLY_0(Vc_LIST_COMPARES, macro)
|
||||
#define Vc_ALL_LOGICAL(macro) Vc_APPLY_0(Vc_LIST_LOGICAL, macro)
|
||||
#define Vc_ALL_BINARY(macro) Vc_APPLY_0(Vc_LIST_BINARY, macro)
|
||||
#define Vc_ALL_SHIFTS(macro) Vc_APPLY_0(Vc_LIST_SHIFTS, macro)
|
||||
#define Vc_ALL_ARITHMETICS(macro) Vc_APPLY_0(Vc_LIST_ARITHMETICS, macro)
|
||||
#define Vc_ALL_FLOAT_VECTOR_TYPES(macro) Vc_APPLY_0(Vc_LIST_FLOAT_VECTOR_TYPES, macro)
|
||||
#define Vc_ALL_VECTOR_TYPES(macro) Vc_APPLY_0(Vc_LIST_VECTOR_TYPES, macro)
|
||||
|
||||
#define Vc_EXACT_TYPE(_test, _reference, _type) \
|
||||
typename std::enable_if<std::is_same<_test, _reference>::value, _type>::type
|
||||
|
||||
#define Vc_make_unique(name) Vc_CAT(Vc_,name,_,__LINE__)
|
||||
|
||||
#if defined(Vc_NO_NOEXCEPT)
|
||||
#define Vc_NOEXCEPT throw()
|
||||
#else
|
||||
#define Vc_NOEXCEPT noexcept
|
||||
#endif
|
||||
|
||||
#ifdef Vc_NO_ALWAYS_INLINE
|
||||
#undef Vc_ALWAYS_INLINE
|
||||
#undef Vc_ALWAYS_INLINE_L
|
||||
#undef Vc_ALWAYS_INLINE_R
|
||||
#define Vc_ALWAYS_INLINE inline
|
||||
#define Vc_ALWAYS_INLINE_L inline
|
||||
#define Vc_ALWAYS_INLINE_R
|
||||
#undef Vc_INTRINSIC
|
||||
#undef Vc_INTRINSIC_L
|
||||
#undef Vc_INTRINSIC_R
|
||||
#define Vc_INTRINSIC inline
|
||||
#define Vc_INTRINSIC_L inline
|
||||
#define Vc_INTRINSIC_R
|
||||
#endif
|
||||
|
||||
#endif // VC_COMMON_MACROS_H_
|
|
@ -0,0 +1,150 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MAKECONTAINER_H_
|
||||
#define VC_COMMON_MAKECONTAINER_H_
|
||||
|
||||
#include "../vector.h"
|
||||
#include <initializer_list>
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template<typename Container, typename T> struct make_container_helper
|
||||
{
|
||||
static constexpr Container help(std::initializer_list<T> list) { return { list }; }
|
||||
};
|
||||
|
||||
template <typename T_, typename Abi, typename Alloc,
|
||||
template <class, class> class Container>
|
||||
struct make_container_helper<Container<Vector<T_, Abi>, Alloc>,
|
||||
typename Vector<T_, Abi>::EntryType> {
|
||||
typedef Vector<T_, Abi> V;
|
||||
typedef typename V::EntryType T;
|
||||
typedef Container<V, Alloc> C;
|
||||
static inline C help(std::initializer_list<T> list) {
|
||||
const std::size_t size = (list.size() + (V::Size - 1)) / V::Size;
|
||||
C v(size);
|
||||
auto containerIt = v.begin();
|
||||
auto init = std::begin(list);
|
||||
const auto initEnd = std::end(list);
|
||||
for (std::size_t i = 0; i < size - 1; ++i) {
|
||||
*containerIt++ = V(init, Vc::Unaligned);
|
||||
init += V::Size;
|
||||
}
|
||||
Vc_ASSERT(all_of(*containerIt == V::Zero()));
|
||||
int j = 0;
|
||||
while (init != initEnd) {
|
||||
(*containerIt)[j++] = *init++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T_, typename Abi, std::size_t N,
|
||||
template <class, std::size_t> class Container>
|
||||
struct make_container_helper<Container<Vector<T_, Abi>, N>,
|
||||
typename Vector<T_, Abi>::EntryType> {
|
||||
typedef Vector<T_, Abi> V;
|
||||
typedef typename V::EntryType T;
|
||||
static constexpr std::size_t size = (N + (V::Size - 1)) / V::Size;
|
||||
typedef Container<
|
||||
V,
|
||||
#if defined Vc_CLANG && Vc_CLANG < 0x30700 // TODO: when did Vc_APPLECLANG fix it?
|
||||
// clang before 3.7.0 has a bug when returning std::array<__m256x, 1>. So
|
||||
// increase it to std::array<__m256x, 2> and fill it with zeros. Better
|
||||
// than returning garbage.
|
||||
(size == 1 && std::is_same<Abi, VectorAbi::Avx>::value) ? 2 :
|
||||
#endif
|
||||
size> C;
|
||||
static inline C help(std::initializer_list<T> list) {
|
||||
Vc_ASSERT(N == list.size())
|
||||
Vc_ASSERT(size == (list.size() + (V::Size - 1)) / V::Size)
|
||||
C v;
|
||||
auto containerIt = v.begin();
|
||||
auto init = std::begin(list);
|
||||
const auto initEnd = std::end(list);
|
||||
for (std::size_t i = 0; i < size - 1; ++i) {
|
||||
*containerIt++ = V(init, Vc::Unaligned);
|
||||
init += V::Size;
|
||||
}
|
||||
Vc_ASSERT(all_of(*containerIt == V::Zero()));
|
||||
int j = 0;
|
||||
while (init != initEnd) {
|
||||
(*containerIt)[j++] = *init++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
/**
|
||||
* \ingroup Containers
|
||||
* \headerfile makeContainer.h <Vc/Utils>
|
||||
*
|
||||
* Construct a container of Vc vectors from a std::initializer_list of scalar entries.
|
||||
*
|
||||
* \tparam Container The container type to construct.
|
||||
* \tparam T The scalar type to use for the initializer_list.
|
||||
*
|
||||
* \param list An initializer list of arbitrary size. The type of the entries is important!
|
||||
* If you pass a list of integers you will get a container filled with Vc::int_v objects.
|
||||
* If, instead, you want to have a container of Vc::float_v objects, be sure the include a
|
||||
* period (.) and the 'f' postfix in the literals. Alternatively, you can pass the
|
||||
* type as second template argument to makeContainer.
|
||||
*
|
||||
* \return Returns a container of the requested class filled with the minimum number of SIMD
|
||||
* vectors to hold the values in the initializer list.
|
||||
* If the number of values in \p list does not match the number of values in the
|
||||
* returned container object, the remaining values in the returned object will be
|
||||
* zero-initialized.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* auto data = Vc::makeContainer<std::vector<float_v>>({ 1.f, 2.f, 3.f, 4.f, 5.f });
|
||||
* // data.size() == 5 if float_v::Size == 1 (i.e. Vc_IMPL=Scalar)
|
||||
* // data.size() == 2 if float_v::Size == 4 (i.e. Vc_IMPL=SSE)
|
||||
* // data.size() == 1 if float_v::Size == 8 (i.e. Vc_IMPL=AVX)
|
||||
* \endcode
|
||||
*/
|
||||
template<typename Container, typename T>
|
||||
constexpr auto makeContainer(std::initializer_list<T> list) -> decltype(make_container_helper<Container, T>::help(list))
|
||||
{
|
||||
return make_container_helper<Container, T>::help(list);
|
||||
}
|
||||
|
||||
template<typename Container, typename T>
|
||||
constexpr auto make_container(std::initializer_list<T> list) -> decltype(makeContainer<Container, T>(list))
|
||||
{
|
||||
return makeContainer<Container, T>(list);
|
||||
}
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_MAKECONTAINER_H_
|
|
@ -0,0 +1,56 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MAKE_UNIQUE_H_
|
||||
#define VC_COMMON_MAKE_UNIQUE_H_
|
||||
|
||||
#include <memory>
|
||||
#include "malloc.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
template<typename T> struct Deleter
|
||||
{
|
||||
Vc_ALWAYS_INLINE void operator()(T *ptr) {
|
||||
ptr->~T();
|
||||
Vc::free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T, MallocAlignment A = Vc::AlignOnVector, class... Args>
|
||||
inline std::unique_ptr<T, Deleter<T>> make_unique(Args&&... args)
|
||||
{
|
||||
return std::unique_ptr<T, Deleter<T>>(new(Vc::malloc<T, A>(1)) T(std::forward<Args>(args)...));
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_MAKE_UNIQUE_H_
|
|
@ -0,0 +1,169 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MALLOC_H_
|
||||
#define VC_COMMON_MALLOC_H_
|
||||
|
||||
#ifndef Vc_VECTOR_DECLARED_
|
||||
#error "Incorrect inclusion order. This header must be included from Vc/vector.h only."
|
||||
#endif
|
||||
|
||||
#if defined _WIN32 || defined _WIN64
|
||||
#include <malloc.h>
|
||||
#else
|
||||
#include <cstdlib>
|
||||
#endif
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
template <size_t X> static constexpr size_t nextMultipleOf(size_t value)
|
||||
{
|
||||
return (value % X) > 0 ? value + X - (value % X) : value;
|
||||
}
|
||||
|
||||
template <std::size_t alignment> Vc_INTRINSIC void *aligned_malloc(std::size_t n)
|
||||
{
|
||||
#ifdef __MIC__
|
||||
return _mm_malloc(nextMultipleOf<alignment>(n), alignment);
|
||||
#elif defined(_WIN32)
|
||||
# ifdef __GNUC__
|
||||
return __mingw_aligned_malloc(nextMultipleOf<alignment>(n), alignment);
|
||||
# else
|
||||
return _aligned_malloc(nextMultipleOf<alignment>(n), alignment);
|
||||
# endif
|
||||
#else
|
||||
void *ptr = nullptr;
|
||||
if (0 == posix_memalign(&ptr, alignment < sizeof(void *) ? sizeof(void *) : alignment,
|
||||
nextMultipleOf<alignment>(n))) {
|
||||
return ptr;
|
||||
}
|
||||
return ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <Vc::MallocAlignment A> Vc_ALWAYS_INLINE void *malloc(size_t n)
|
||||
{
|
||||
switch (A) {
|
||||
case Vc::AlignOnVector:
|
||||
return aligned_malloc<Vc::VectorAlignment>(n);
|
||||
case Vc::AlignOnCacheline:
|
||||
// TODO: hardcoding 64 is not such a great idea
|
||||
return aligned_malloc<64>(n);
|
||||
case Vc::AlignOnPage:
|
||||
// TODO: hardcoding 4096 is not such a great idea
|
||||
return aligned_malloc<4096>(n);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE void free(void *p)
|
||||
{
|
||||
#ifdef __MIC__
|
||||
_mm_free(p);
|
||||
#elif defined(_WIN32)
|
||||
# ifdef __GNUC__
|
||||
return __mingw_aligned_free(p);
|
||||
# else
|
||||
return _aligned_free(p);
|
||||
# endif
|
||||
#else
|
||||
std::free(p);
|
||||
#endif
|
||||
}
|
||||
} // namespace Common
|
||||
|
||||
/**
|
||||
* Allocates memory on the Heap with alignment and padding suitable for vectorized access.
|
||||
*
|
||||
* Memory that was allocated with this function must be released with Vc::free! Other methods might
|
||||
* work but are not portable.
|
||||
*
|
||||
* \param n Specifies the number of objects the allocated memory must be able to store.
|
||||
* \tparam T The type of the allocated memory. Note, that the constructor is not called.
|
||||
* \tparam A Determines the alignment of the memory. See \ref Vc::MallocAlignment.
|
||||
*
|
||||
* \return Pointer to memory of the requested type, or 0 on error. The allocated memory is padded at
|
||||
* the end to be a multiple of the requested alignment \p A. Thus if you request memory for 21
|
||||
* int objects, aligned via Vc::AlignOnCacheline, you can safely read a full cacheline until the
|
||||
* end of the array, without generating an out-of-bounds access. For a cacheline size of 64 Bytes
|
||||
* and an int size of 4 Bytes you would thus get an array of 128 Bytes to work with.
|
||||
*
|
||||
* \warning
|
||||
* \li The standard malloc function specifies the number of Bytes to allocate whereas this
|
||||
* function specifies the number of values, thus differing in a factor of sizeof(T).
|
||||
* \li This function is mainly meant for use with builtin types. If you use a custom
|
||||
* type with a sizeof that is not a multiple of 2 the results might not be what you expect.
|
||||
* \li The constructor of T is not called. You can make up for this:
|
||||
* \code
|
||||
* SomeType *array = new(Vc::malloc<SomeType, Vc::AlignOnCacheline>(N)) SomeType[N];
|
||||
* \endcode
|
||||
*
|
||||
* \see Vc::free
|
||||
*
|
||||
* \ingroup Utilities
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
template<typename T, Vc::MallocAlignment A>
|
||||
Vc_ALWAYS_INLINE T *malloc(size_t n)
|
||||
{
|
||||
return static_cast<T *>(Common::malloc<A>(n * sizeof(T)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees memory that was allocated with Vc::malloc.
|
||||
*
|
||||
* \param p The pointer to the memory to be freed.
|
||||
*
|
||||
* \tparam T The type of the allocated memory.
|
||||
*
|
||||
* \warning The destructor of T is not called. If needed, you can call the destructor before calling
|
||||
* free:
|
||||
* \code
|
||||
* for (int i = 0; i < N; ++i) {
|
||||
* p[i].~T();
|
||||
* }
|
||||
* Vc::free(p);
|
||||
* \endcode
|
||||
*
|
||||
* \ingroup Utilities
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*
|
||||
* \see Vc::malloc
|
||||
*/
|
||||
template<typename T>
|
||||
Vc_ALWAYS_INLINE void free(T *p)
|
||||
{
|
||||
Common::free(p);
|
||||
}
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_MALLOC_H_
|
|
@ -0,0 +1,435 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MASK_H_
|
||||
#define VC_COMMON_MASK_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/**
|
||||
* \class Mask mask.h <Vc/vector.h>
|
||||
* \ingroup Masks
|
||||
*
|
||||
* The main SIMD mask class.
|
||||
*/
|
||||
template <typename T, typename Abi = VectorAbi::Best<T>> class Mask
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Returns the number of boolean components (\VSize{T}) in a mask of this type.
|
||||
*
|
||||
* The size of the mask. I.e. the number of boolean entries in the mask. Do not
|
||||
* make any assumptions about the size of masks.
|
||||
*
|
||||
* In addition, you can easily use if clauses that compare sizes. The compiler can
|
||||
* statically evaluate and fully optimize dead code away (very much like \#ifdef, but
|
||||
* with syntax checking).
|
||||
*
|
||||
* \returns The number of components (i.e. \VSize{T}) objects of this mask type store
|
||||
* and manipulate.
|
||||
*/
|
||||
static constexpr size_t size() { return VectorTraits<T, Abi>::size(); }
|
||||
///\copydoc size
|
||||
///\deprecated Use Vc::Mask::size instead.
|
||||
static constexpr size_t Size = VectorTraits<T, Abi>::size();
|
||||
|
||||
/**
|
||||
* Specifies the alignment requirement for aligned load and store calls for objects of
|
||||
* this mask type.
|
||||
*/
|
||||
static constexpr size_t MemoryAlignment = VectorTraits<T, Abi>::maskMemoryAlignment();
|
||||
|
||||
/// The ABI tag type of the current template instantiation.
|
||||
using abi = Abi;
|
||||
|
||||
/**
|
||||
* The \c EntryType of masks is always \c bool, independent of \c T.
|
||||
*/
|
||||
using EntryType = bool;
|
||||
/// \copydoc EntryType
|
||||
using value_type = EntryType;
|
||||
|
||||
/// The reference wrapper type used for accessing individual mask components.
|
||||
using EntryReference = typename VectorTraits<T, Abi>::EntryReference;
|
||||
/// \copydoc EntryReference
|
||||
using value_reference = EntryReference;
|
||||
|
||||
/**
|
||||
* The \c VectorEntryType, in contrast to \c EntryType, reveals information about the SIMD
|
||||
* implementation.
|
||||
* This type is useful for the \c sizeof operator in generic functions.
|
||||
*/
|
||||
using VectorEntryType = typename VectorTraits<T, Abi>::VectorEntryType;
|
||||
|
||||
/**\internal
|
||||
* The \c VectorType reveals the implementation-specific internal type used for the SIMD type.
|
||||
*/
|
||||
using VectorType = typename VectorTraits<T, Abi>::VectorType;
|
||||
/**\internal
|
||||
* \copydoc VectorType
|
||||
*/
|
||||
using vector_type = VectorType;
|
||||
|
||||
/*
|
||||
* The associated Vector<T> type.
|
||||
*/
|
||||
//using Vector = Vector<T, Abi>;
|
||||
|
||||
/// \name Generators
|
||||
///@{
|
||||
/**
|
||||
* Creates a new mask object initialized to zero/\c false.
|
||||
*
|
||||
* \returns A mask object with zero-initialized components.
|
||||
*/
|
||||
Vc_INTRINSIC static Mask Zero();
|
||||
|
||||
/**
|
||||
* Creates a mask object initialized to one/\c true.
|
||||
*
|
||||
* \returns A mask object with components initialized to \c true.
|
||||
*/
|
||||
Vc_INTRINSIC static Mask One();
|
||||
|
||||
/// Generate a mask object from booleans returned from the function \p gen.
|
||||
template <typename G> static Vc_INTRINSIC Mask generate(G &&gen);
|
||||
///@}
|
||||
|
||||
/// \name Compile-Time Constant Initialization
|
||||
///@{
|
||||
/**
|
||||
* Construct a zero-initialized vector object.
|
||||
*
|
||||
* This constructor follows the behavior of the underlying \c bool type in that the
|
||||
* expression `bool()` zero-initializes the object (to \c false). On the other hand
|
||||
* the variable \c x in `bool x;` is uninitialized.
|
||||
* Since, for class types, both expressions call the default constructor `Mask<T> x`
|
||||
* must zero-initialize \c x as well.
|
||||
*/
|
||||
Vc_INTRINSIC Mask() = default;
|
||||
|
||||
/// Zero-initialize the new mask object (\c false).
|
||||
/// \see Vc::Zero, Zero()
|
||||
Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero);
|
||||
|
||||
/// Initialize the new mask object to one (\c true).
|
||||
/// \see Vc::One, One()
|
||||
Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne);
|
||||
///@}
|
||||
|
||||
/// \name Conversion/Broadcast Constructors
|
||||
///@{
|
||||
/**
|
||||
* Broadcast constructor.
|
||||
*
|
||||
* Set all components of the new mask object to \p b.
|
||||
*
|
||||
* \param b Determines the initial state of the mask.
|
||||
*/
|
||||
Vc_INTRINSIC explicit Mask(bool b);
|
||||
|
||||
/**
|
||||
* Implicit conversion from a compatible (equal \VSize{T} on every platform) mask
|
||||
* object.
|
||||
*
|
||||
* \param otherMask The mask to be converted.
|
||||
*/
|
||||
template <typename U>
|
||||
Vc_INTRINSIC Mask(U &&otherMask,
|
||||
Common::enable_if_mask_converts_implicitly<Mask, T, U> = nullarg);
|
||||
|
||||
#if Vc_IS_VERSION_1
|
||||
/**
|
||||
* Explicit conversion (static_cast) from a mask object that potentially has a
|
||||
* different \VSize{T}.
|
||||
*
|
||||
* \param otherMask The mask to be converted.
|
||||
*
|
||||
* \internal This is implemented via simd_cast in scalar/simd_cast_caller.h
|
||||
*/
|
||||
template <typename U>
|
||||
Vc_DEPRECATED(
|
||||
"use simd_cast instead of explicit type casting to convert between mask types")
|
||||
Vc_INTRINSIC_L
|
||||
explicit Mask(U &&otherMask, Common::enable_if_mask_converts_explicitly<T, U> =
|
||||
nullarg) Vc_INTRINSIC_R;
|
||||
///@}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \name Loads & Stores
|
||||
*/
|
||||
///@{
|
||||
/**
|
||||
* Load constructor from an array of \c bool.
|
||||
*
|
||||
* This constructor implements an explicit conversion from an array of booleans to a
|
||||
* mask object. It corresponds to a Vector load constructor.
|
||||
*
|
||||
* \param mem A pointer to the start of the array of booleans.
|
||||
* \see Mask(const bool *, Flags), load(const bool *)
|
||||
*/
|
||||
Vc_ALWAYS_INLINE explicit Mask(const bool *mem);
|
||||
/**
|
||||
* Overload of the above with a load/store flag argument.
|
||||
*
|
||||
* \param mem A pointer to the start of the array of booleans.
|
||||
* \param flags Choose a combination of flags such as Vc::Aligned, Vc::Streaming,
|
||||
* Vc::Unaligned, Vc::PrefetchDefault, ...
|
||||
* \see load(const bool *, Flags)
|
||||
*/
|
||||
template <typename Flags> Vc_ALWAYS_INLINE explicit Mask(const bool *mem, Flags flags);
|
||||
|
||||
/**
|
||||
* Load the components of the mask from an array of \c bool.
|
||||
*
|
||||
* \param mem A pointer to the start of the array of booleans.
|
||||
* \see load(const bool *, Flags), Mask(const bool *)
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void load(const bool *mem);
|
||||
/**
|
||||
* Overload of the above with a load/store flag argument.
|
||||
*
|
||||
* \param mem A pointer to the start of the array of booleans.
|
||||
* \param flags Choose a combination of flags such as Vc::Aligned, Vc::Streaming,
|
||||
* Vc::Unaligned, Vc::PrefetchDefault, ...
|
||||
* \see Mask(const bool *, Flags)
|
||||
*/
|
||||
template <typename Flags> Vc_ALWAYS_INLINE void load(const bool *mem, Flags flags);
|
||||
|
||||
/**
|
||||
* Store the values of the mask to an array of \c bool.
|
||||
*
|
||||
* \param mem A pointer to the start of the array of booleans.
|
||||
* \see store(bool *, Flags)
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void store(bool *mem) const;
|
||||
/**
|
||||
* Overload of the above with a load/store flag argument.
|
||||
*
|
||||
* \param mem A pointer to the start of the array of booleans.
|
||||
* \param flags Choose a combination of flags such as Vc::Aligned, Vc::Streaming,
|
||||
* Vc::Unaligned, Vc::PrefetchDefault, ...
|
||||
*/
|
||||
template <typename Flags> Vc_ALWAYS_INLINE void store(bool *mem, Flags flags) const;
|
||||
///@}
|
||||
|
||||
/// \name Comparison Operators
|
||||
///@{
|
||||
/**
|
||||
* Returns whether the two masks are equal in all components.
|
||||
*
|
||||
* \param mask The other mask to compare against.
|
||||
* \returns A scalar boolean value that says whether all components of the two masks
|
||||
* are equal.
|
||||
*
|
||||
* \note If you expected a behavior similar to the compare operator of Vc::Vector,
|
||||
* consider that the bitwise operators already implement such functionality. There is
|
||||
* little use, typically, in having `a == b` return the same as `a ^ b`. In general,
|
||||
* it is more useful to query `all_of(a ^ b)` which is the same as this equality
|
||||
* operator.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE bool operator==(const Mask &mask) const;
|
||||
|
||||
/**
|
||||
* Returns whether the two masks are different in at least one component.
|
||||
*
|
||||
* \param mask The other mask to compare against.
|
||||
* \returns A scalar boolean value that says whether at least one component of the two masks is different.
|
||||
*
|
||||
* \note `(a == b) == !(a != b)` holds
|
||||
* \see Mask::operator==(const Mask &)
|
||||
*/
|
||||
Vc_ALWAYS_INLINE bool operator!=(const Mask &mask) const;
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \name Logical and Binary Operators
|
||||
*
|
||||
* \brief Component-wise logical/binary operations on mask objects.
|
||||
*
|
||||
* The effect of logical and binary \c AND and \c OR is equivalent for mask types (as
|
||||
* it is for \c bool).
|
||||
*/
|
||||
///@{
|
||||
|
||||
/// Returns the component-wise application of a logical \c AND to \p mask.
|
||||
Vc_ALWAYS_INLINE Mask operator&&(const Mask &mask) const;
|
||||
/// Returns the component-wise application of a binary \c AND to \p mask.
|
||||
Vc_ALWAYS_INLINE Mask operator&(const Mask &mask) const;
|
||||
/// Returns the component-wise application of a logical \c OR to \p mask.
|
||||
Vc_ALWAYS_INLINE Mask operator||(const Mask &mask) const;
|
||||
/// Returns the component-wise application of a binary \c OR to \p mask.
|
||||
Vc_ALWAYS_INLINE Mask operator|(const Mask &mask) const;
|
||||
/// Returns the component-wise application of a binary \c XOR to \p mask.
|
||||
Vc_ALWAYS_INLINE Mask operator^(const Mask &mask) const;
|
||||
/// Returns a mask with inverted components.
|
||||
Vc_ALWAYS_INLINE Mask operator!() const;
|
||||
|
||||
/// Modifies the mask using an \c AND operation with \p mask.
|
||||
Vc_ALWAYS_INLINE Mask &operator&=(const Mask &mask);
|
||||
/// Modifies the mask using an \c OR operation with \p mask.
|
||||
Vc_ALWAYS_INLINE Mask &operator|=(const Mask &mask);
|
||||
/// Modifies the mask using an \c XOR operation with \p mask.
|
||||
Vc_ALWAYS_INLINE Mask &operator^=(const Mask &mask);
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \name Reductions
|
||||
*
|
||||
* \see any_of, all_of, none_of, some_of
|
||||
*/
|
||||
///@{
|
||||
|
||||
/// Returns a logical \c AND of all components.
|
||||
Vc_ALWAYS_INLINE bool isFull() const;
|
||||
/// Returns a logical \c OR of all components.
|
||||
Vc_ALWAYS_INLINE bool isNotEmpty() const;
|
||||
/// Returns \c true if components are \c false, \c false otherwise.
|
||||
Vc_ALWAYS_INLINE bool isEmpty() const;
|
||||
/// Returns `!isFull() && !isEmpty()`.
|
||||
Vc_ALWAYS_INLINE bool isMix() const;
|
||||
///@}
|
||||
|
||||
/**\internal
|
||||
* \name Internal Data Access
|
||||
*/
|
||||
///@{
|
||||
Vc_ALWAYS_INLINE bool data() const;
|
||||
Vc_ALWAYS_INLINE bool dataI() const;
|
||||
Vc_ALWAYS_INLINE bool dataD() const;
|
||||
///@}
|
||||
|
||||
/// \name Scalar Subscript Operators
|
||||
///@{
|
||||
/**
|
||||
* Lvalue-reference-like access to mask entries.
|
||||
*
|
||||
* \param index Determines the boolean to be accessed.
|
||||
* \return a temporary proxy object referencing the \p index th entry of the mask.
|
||||
*
|
||||
* \warning This operator does not return an lvalue reference (to \c bool), but rather
|
||||
* a temporary (rvalue) object that mimics an lvalue reference (as much as is possible
|
||||
* with C++11/14).
|
||||
*/
|
||||
Vc_ALWAYS_INLINE EntryReference operator[](size_t index);
|
||||
|
||||
/**
|
||||
* Read-only access to mask entries.
|
||||
*
|
||||
* \param index Determines the boolean to be accessed.
|
||||
* \return The \p index th entry of the mask as a \c bool (rvalue).
|
||||
*
|
||||
* \warning This operator does not return an lvalue reference (to `const bool`), but
|
||||
* rather a temporary (rvalue) \c bool.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE EntryType operator[](size_t index) const;
|
||||
///@}
|
||||
|
||||
/// Returns how many components of the mask are \c true.
|
||||
Vc_ALWAYS_INLINE int count() const;
|
||||
|
||||
/**
|
||||
* Returns the index of the first one in the mask.
|
||||
*
|
||||
* \returns the index of the first component that is \c true.
|
||||
*
|
||||
* \warning The return value is undefined if the mask is empty.
|
||||
*
|
||||
* Thus, unless `none_of(mask)`, `mask[mask.firstOne()] == true` holds and `mask[i] ==
|
||||
* false` for all `i < mask.firstOne()`.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE int firstOne() const;
|
||||
|
||||
/**
|
||||
* Convert the boolean components of the mask into bits of an integer.
|
||||
*
|
||||
* \return An \c int where each bit corresponds to the boolean value in the mask.
|
||||
*
|
||||
* For example, the mask `[true, false, false, true]` results in a `9` (in binary: `1001`).
|
||||
*/
|
||||
Vc_ALWAYS_INLINE int toInt() const;
|
||||
|
||||
/// Returns a mask with components shifted by \p amount places.
|
||||
Vc_INTRINSIC Vc_PURE Mask shifted(int amount) const;
|
||||
|
||||
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(Mask));
|
||||
|
||||
private:
|
||||
VectorType d;
|
||||
};
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
*
|
||||
* \name Boolean Reductions
|
||||
*/
|
||||
//@{
|
||||
/** \ingroup Utilities
|
||||
* Returns whether all entries in the mask \p m are \c true.
|
||||
*/
|
||||
template<typename Mask> constexpr bool all_of(const Mask &m) { return m.isFull(); }
|
||||
/** \ingroup Utilities
|
||||
* Returns \p b
|
||||
*/
|
||||
constexpr bool all_of(bool b) { return b; }
|
||||
|
||||
/** \ingroup Utilities
|
||||
* Returns whether at least one entry in the mask \p m is \c true.
|
||||
*/
|
||||
template<typename Mask> constexpr bool any_of(const Mask &m) { return m.isNotEmpty(); }
|
||||
/** \ingroup Utilities
|
||||
* Returns \p b
|
||||
*/
|
||||
constexpr bool any_of(bool b) { return b; }
|
||||
|
||||
/** \ingroup Utilities
|
||||
* Returns whether all entries in the mask \p m are \c false.
|
||||
*/
|
||||
template<typename Mask> constexpr bool none_of(const Mask &m) { return m.isEmpty(); }
|
||||
/** \ingroup Utilities
|
||||
* Returns \p !b
|
||||
*/
|
||||
constexpr bool none_of(bool b) { return !b; }
|
||||
|
||||
/** \ingroup Utilities
|
||||
* Returns whether at least one entry in \p m is \c true and at least one entry in \p m is \c
|
||||
* false.
|
||||
*/
|
||||
template<typename Mask> constexpr bool some_of(const Mask &m) { return m.isMix(); }
|
||||
/** \ingroup Utilities
|
||||
* Returns \c false
|
||||
*/
|
||||
constexpr bool some_of(bool) { return false; }
|
||||
//@}
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_MASK_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,98 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MASKENTRY_H_
|
||||
#define VC_COMMON_MASKENTRY_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template<size_t Bytes> struct MaskBoolStorage;
|
||||
// the following for typedefs must use std::intN_t and NOT! Vc::intN_t. The latter
|
||||
// segfaults ICC 15.0.3.
|
||||
template<> struct MaskBoolStorage<1> { typedef std::int8_t type; };
|
||||
template<> struct MaskBoolStorage<2> { typedef std::int16_t type; };
|
||||
template<> struct MaskBoolStorage<4> { typedef std::int32_t type; };
|
||||
template<> struct MaskBoolStorage<8> { typedef std::int64_t type; };
|
||||
} // anonymous namespace
|
||||
|
||||
template<size_t Bytes> class MaskBool
|
||||
{
|
||||
typedef typename MaskBoolStorage<Bytes>::type storage_type Vc_MAY_ALIAS;
|
||||
storage_type data;
|
||||
public:
|
||||
constexpr MaskBool(bool x) noexcept : data(x ? -1 : 0) {}
|
||||
Vc_ALWAYS_INLINE MaskBool &operator=(bool x) noexcept { data = x ? -1 : 0; return *this; }
|
||||
template <typename T, typename = enable_if<(!std::is_same<T, bool>::value &&
|
||||
std::is_fundamental<T>::value)>>
|
||||
Vc_ALWAYS_INLINE MaskBool &operator=(T x) noexcept
|
||||
{
|
||||
data = reinterpret_cast<const storage_type &>(x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE MaskBool(const MaskBool &) noexcept = default;
|
||||
Vc_ALWAYS_INLINE MaskBool &operator=(const MaskBool &) noexcept = default;
|
||||
|
||||
template <typename T, typename = enable_if<(std::is_same<T, bool>::value ||
|
||||
(std::is_fundamental<T>::value &&
|
||||
sizeof(storage_type) == sizeof(T)))>>
|
||||
constexpr operator T() const noexcept
|
||||
{
|
||||
return std::is_same<T, bool>::value ? T((data & 1) != 0) : aliasing_cast<T>(data);
|
||||
}
|
||||
} Vc_MAY_ALIAS;
|
||||
|
||||
template <typename A,
|
||||
typename B,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<A, bool>::value &&std::is_convertible<B, bool>::value,
|
||||
int>::type = 0>
|
||||
constexpr bool operator==(A &&a, B &&b)
|
||||
{
|
||||
return static_cast<bool>(a) == static_cast<bool>(b);
|
||||
}
|
||||
template <typename A,
|
||||
typename B,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<A, bool>::value &&std::is_convertible<B, bool>::value,
|
||||
int>::type = 0>
|
||||
constexpr bool operator!=(A &&a, B &&b)
|
||||
{
|
||||
return static_cast<bool>(a) != static_cast<bool>(b);
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_MASKENTRY_H_
|
|
@ -0,0 +1,142 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MATH_H_
|
||||
#define VC_COMMON_MATH_H_
|
||||
|
||||
#define Vc_COMMON_MATH_H_INTERNAL 1
|
||||
|
||||
#include "trigonometric.h"
|
||||
|
||||
#include "const.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
// TODO, not vectorized:
|
||||
template <class T, class Abi>
|
||||
SimdArray<int, Vector<T, Abi>::size()> fpclassify(const Vector<T, Abi> &x)
|
||||
{
|
||||
return SimdArray<int, Vector<T, Abi>::size()>(
|
||||
[&](std::size_t i) { return std::fpclassify(x[i]); });
|
||||
}
|
||||
template <class T, size_t N> SimdArray<int, N> fpclassify(const SimdArray<T, N> &x)
|
||||
{
|
||||
return SimdArray<int, N>([&](std::size_t i) { return std::fpclassify(x[i]); });
|
||||
}
|
||||
|
||||
#ifdef Vc_IMPL_SSE
|
||||
// for SSE, AVX, and AVX2
|
||||
#include "logarithm.h"
|
||||
#include "exponential.h"
|
||||
#ifdef Vc_IMPL_AVX
|
||||
inline AVX::double_v exp(AVX::double_v _x)
|
||||
{
|
||||
AVX::Vector<double> x = _x;
|
||||
typedef AVX::Vector<double> V;
|
||||
typedef V::Mask M;
|
||||
typedef AVX::Const<double> C;
|
||||
|
||||
const M overflow = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log
|
||||
const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log
|
||||
|
||||
V px = floor(C::log2_e() * x + 0.5);
|
||||
__m128i tmp = _mm256_cvttpd_epi32(px.data());
|
||||
const SimdArray<int, V::Size> n = SSE::int_v{tmp};
|
||||
x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>(); // ln2
|
||||
x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2
|
||||
|
||||
const double P[] = {
|
||||
Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(),
|
||||
Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull, -6>(),
|
||||
Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 0>()
|
||||
};
|
||||
const double Q[] = {
|
||||
Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(),
|
||||
Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull, -9>(),
|
||||
Vc::Detail::doubleConstant<1, 0x000d17099887e074ull, -3>(),
|
||||
Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 1>()
|
||||
};
|
||||
const V x2 = x * x;
|
||||
px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
|
||||
x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
|
||||
x = V::One() + 2.0 * x;
|
||||
|
||||
x = ldexp(x, n); // == x * 2ⁿ
|
||||
|
||||
x(overflow) = std::numeric_limits<double>::infinity();
|
||||
x.setZero(underflow);
|
||||
|
||||
return x;
|
||||
}
|
||||
#endif // Vc_IMPL_AVX
|
||||
|
||||
inline SSE::double_v exp(SSE::double_v::AsArg _x) {
|
||||
SSE::Vector<double> x = _x;
|
||||
typedef SSE::Vector<double> V;
|
||||
typedef V::Mask M;
|
||||
typedef SSE::Const<double> C;
|
||||
|
||||
const M overflow = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log
|
||||
const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log
|
||||
|
||||
V px = floor(C::log2_e() * x + 0.5);
|
||||
SimdArray<int, V::Size> n;
|
||||
_mm_storel_epi64(reinterpret_cast<__m128i *>(&n), _mm_cvttpd_epi32(px.data()));
|
||||
x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>(); // ln2
|
||||
x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2
|
||||
|
||||
const double P[] = {
|
||||
Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(),
|
||||
Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull, -6>(),
|
||||
Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 0>()
|
||||
};
|
||||
const double Q[] = {
|
||||
Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(),
|
||||
Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull, -9>(),
|
||||
Vc::Detail::doubleConstant<1, 0x000d17099887e074ull, -3>(),
|
||||
Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 1>()
|
||||
};
|
||||
const V x2 = x * x;
|
||||
px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
|
||||
x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
|
||||
x = V::One() + 2.0 * x;
|
||||
|
||||
x = ldexp(x, n); // == x * 2ⁿ
|
||||
|
||||
x(overflow) = std::numeric_limits<double>::infinity();
|
||||
x.setZero(underflow);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif
|
||||
} // namespace Vc
|
||||
|
||||
#undef Vc_COMMON_MATH_H_INTERNAL
|
||||
|
||||
#endif // VC_COMMON_MATH_H_
|
|
@ -0,0 +1,591 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MEMORY_H_
|
||||
#define VC_COMMON_MEMORY_H_
|
||||
|
||||
#include "memorybase.h"
|
||||
#include <assert.h>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <cstddef>
|
||||
#include <initializer_list>
|
||||
#include "memoryfwd.h"
|
||||
#include "malloc.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
template<typename V, size_t Size> struct _MemorySizeCalculation
|
||||
{
|
||||
enum AlignmentCalculations {
|
||||
Alignment = V::Size,
|
||||
AlignmentMask = Alignment - 1,
|
||||
MaskedSize = Size & AlignmentMask,
|
||||
Padding = Alignment - MaskedSize,
|
||||
PaddedSize = MaskedSize == 0 ? Size : Size + Padding
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* \ingroup Containers
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*
|
||||
* A helper class for fixed-size two-dimensional arrays.
|
||||
*
|
||||
* \param V The vector type you want to operate on. (e.g. float_v or uint_v)
|
||||
* \param Size1 Number of rows
|
||||
* \param Size2 Number of columns
|
||||
*/
|
||||
template <typename V, size_t Size1, size_t Size2, bool InitPadding>
|
||||
class Memory : public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
|
||||
Memory<V, Size2, 0, InitPadding>>
|
||||
{
|
||||
public:
|
||||
typedef typename V::EntryType EntryType;
|
||||
|
||||
private:
|
||||
using RowMemory = Memory<V, Size2, 0, InitPadding>;
|
||||
typedef MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2, RowMemory> Base;
|
||||
friend class MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2, RowMemory>;
|
||||
friend class MemoryDimensionBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
|
||||
RowMemory>;
|
||||
enum : size_t {
|
||||
Alignment = V::MemoryAlignment,
|
||||
PaddedSize2 = _MemorySizeCalculation<V, Size2>::PaddedSize
|
||||
};
|
||||
alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
|
||||
// integer constant' unless the
|
||||
// static_cast is present
|
||||
RowMemory m_mem[Size1];
|
||||
|
||||
public:
|
||||
using Base::vector;
|
||||
enum Constants {
|
||||
RowCount = Size1,
|
||||
VectorsCount = PaddedSize2 / V::Size
|
||||
};
|
||||
|
||||
Memory() = default;
|
||||
|
||||
/**
|
||||
* \return the number of rows in the array.
|
||||
*
|
||||
* \note This function can be eliminated by an optimizing compiler.
|
||||
*/
|
||||
static constexpr size_t rowsCount() { return RowCount; }
|
||||
/**
|
||||
* \return the number of scalar entries in the whole array.
|
||||
*
|
||||
* \warning Do not use this function for scalar iteration over the array since there will be
|
||||
* padding between rows if \c Size2 is not divisible by \c V::Size.
|
||||
*
|
||||
* \note This function can be optimized into a compile-time constant.
|
||||
*/
|
||||
static constexpr size_t entriesCount() { return Size1 * Size2; }
|
||||
/**
|
||||
* \return the number of vectors in the whole array.
|
||||
*
|
||||
* \note This function can be optimized into a compile-time constant.
|
||||
*/
|
||||
static constexpr size_t vectorsCount() { return VectorsCount * Size1; }
|
||||
|
||||
/**
|
||||
* Copies the data from a different object.
|
||||
*
|
||||
* \param rhs The object to copy the data from.
|
||||
*
|
||||
* \return reference to the modified Memory object.
|
||||
*
|
||||
* \note Both objects must have the exact same vectorsCount().
|
||||
*/
|
||||
template<typename Parent, typename RM>
|
||||
Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
Detail::copyVectors(*this, rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
|
||||
Detail::copyVectors(*this, rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize all data with the given vector.
|
||||
*
|
||||
* \param v This vector will be used to initialize the memory.
|
||||
*
|
||||
* \return reference to the modified Memory object.
|
||||
*/
|
||||
inline Memory &operator=(const V &v) {
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) = v;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A helper class to simplify usage of correctly aligned and padded memory, allowing both vector and
|
||||
* scalar access.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
Vc::Memory<int_v, 11> array;
|
||||
|
||||
// scalar access:
|
||||
for (size_t i = 0; i < array.entriesCount(); ++i) {
|
||||
int x = array[i]; // read
|
||||
array[i] = x; // write
|
||||
}
|
||||
// more explicit alternative:
|
||||
for (size_t i = 0; i < array.entriesCount(); ++i) {
|
||||
int x = array.scalar(i); // read
|
||||
array.scalar(i) = x; // write
|
||||
}
|
||||
|
||||
// vector access:
|
||||
for (size_t i = 0; i < array.vectorsCount(); ++i) {
|
||||
int_v x = array.vector(i); // read
|
||||
array.vector(i) = x; // write
|
||||
}
|
||||
* \endcode
|
||||
* This code allocates a small array and implements three equivalent loops (that do nothing useful).
|
||||
* The loops show how scalar and vector read/write access is best implemented.
|
||||
*
|
||||
* Since the size of 11 is not a multiple of int_v::Size (unless you use the
|
||||
* scalar Vc implementation) the last write access of the vector loop would normally be out of
|
||||
* bounds. But the Memory class automatically pads the memory such that the whole array can be
|
||||
* accessed with correctly aligned memory addresses.
|
||||
*
|
||||
* \param V The vector type you want to operate on. (e.g. float_v or uint_v)
|
||||
* \param Size The number of entries of the scalar base type the memory should hold. This
|
||||
* is thus the same number as you would use for a normal C array (e.g. float mem[11] becomes
|
||||
* Memory<float_v, 11> mem).
|
||||
*
|
||||
* \see Memory<V, 0u>
|
||||
*
|
||||
* \ingroup Containers
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
template <typename V, size_t Size, bool InitPadding>
|
||||
class Memory<V, Size, 0u, InitPadding> :
|
||||
public MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>
|
||||
{
|
||||
public:
|
||||
typedef typename V::EntryType EntryType;
|
||||
private:
|
||||
typedef MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void> Base;
|
||||
friend class MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
|
||||
friend class MemoryDimensionBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
|
||||
enum : size_t {
|
||||
Alignment = V::MemoryAlignment, // in Bytes
|
||||
MaskedSize = Size & (V::Size - 1), // the fraction of Size that exceeds
|
||||
// an integral multiple of V::Size
|
||||
Padding = V::Size - MaskedSize,
|
||||
PaddedSize = MaskedSize == 0 ? Size : Size + Padding
|
||||
};
|
||||
alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
|
||||
// integer constant' unless the
|
||||
// static_cast is present
|
||||
EntryType m_mem[PaddedSize];
|
||||
|
||||
public:
|
||||
using Base::vector;
|
||||
enum Constants {
|
||||
EntriesCount = Size,
|
||||
VectorsCount = PaddedSize / V::Size
|
||||
};
|
||||
|
||||
Memory()
|
||||
{
|
||||
if (InitPadding) {
|
||||
Base::lastVector() = V::Zero();
|
||||
}
|
||||
}
|
||||
|
||||
Memory(std::initializer_list<EntryType> init)
|
||||
{
|
||||
Vc_ASSERT(init.size() <= Size);
|
||||
Base::lastVector() = V::Zero();
|
||||
std::copy(init.begin(), init.end(), &m_mem[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap existing data with the Memory convenience class.
|
||||
*
|
||||
* This function returns a \em reference to a Memory<V, Size, 0> object that you must
|
||||
* capture to avoid a copy of the whole data:
|
||||
* \code
|
||||
* Memory<float_v, 16> &m = Memory<float_v, 16>::fromRawData(someAlignedPointerToFloat)
|
||||
* \endcode
|
||||
*
|
||||
* \param ptr An aligned pointer to memory of type \p V::EntryType (e.g. \c float for
|
||||
* Vc::float_v).
|
||||
* \return A Memory object placed at the given location in memory.
|
||||
*
|
||||
* \warning The pointer \p ptr passed to this function must be aligned according to the
|
||||
* alignment restrictions of \p V.
|
||||
* \warning The size of the accessible memory must match \p Size. This includes the
|
||||
* required padding at the end to allow the last entries to be accessed via vectors. If
|
||||
* you know what you are doing you might violate this constraint.
|
||||
* \warning It is your responsibility to ensure that the memory is released correctly
|
||||
* (not too early/not leaked). This function simply adds convenience functions to \em
|
||||
* access the memory.
|
||||
*/
|
||||
static Vc_ALWAYS_INLINE Vc_CONST Memory<V, Size, 0u, false> &fromRawData(EntryType *ptr)
|
||||
{
|
||||
// DANGER! This placement new has to use the right address. If the compiler decides
|
||||
// RowMemory requires padding before the actual data then the address has to be adjusted
|
||||
// accordingly
|
||||
char *addr = reinterpret_cast<char *>(ptr);
|
||||
typedef Memory<V, Size, 0u, false> MM;
|
||||
addr -= offsetof(MM, m_mem);
|
||||
return *new(addr) MM;
|
||||
}
|
||||
|
||||
/**
|
||||
* \return the number of scalar entries in the whole array.
|
||||
*
|
||||
* \note This function can be optimized into a compile-time constant.
|
||||
*/
|
||||
static constexpr size_t entriesCount() { return EntriesCount; }
|
||||
|
||||
/**
|
||||
* \return the number of vectors in the whole array.
|
||||
*
|
||||
* \note This function can be optimized into a compile-time constant.
|
||||
*/
|
||||
static constexpr size_t vectorsCount() { return VectorsCount; }
|
||||
|
||||
inline Memory(const Memory &rhs)
|
||||
{
|
||||
Detail::copyVectors(*this, rhs);
|
||||
}
|
||||
|
||||
template <size_t S> inline Memory(const Memory<V, S> &rhs)
|
||||
{
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
Detail::copyVectors(*this, rhs);
|
||||
}
|
||||
|
||||
inline Memory &operator=(const Memory &rhs)
|
||||
{
|
||||
Detail::copyVectors(*this, rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs)
|
||||
{
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
Detail::copyVectors(*this, rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
|
||||
std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
|
||||
return *this;
|
||||
}
|
||||
inline Memory &operator=(const V &v) {
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) = v;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A helper class that is very similar to Memory<V, Size> but with dynamically allocated memory and
|
||||
* thus dynamic size.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
size_t size = 11;
|
||||
Vc::Memory<int_v> array(size);
|
||||
|
||||
// scalar access:
|
||||
for (size_t i = 0; i < array.entriesCount(); ++i) {
|
||||
array[i] = i;
|
||||
}
|
||||
|
||||
// vector access:
|
||||
for (size_t i = 0; i < array.vectorsCount(); ++i) {
|
||||
array.vector(i) = int_v::IndexesFromZero() + i * int_v::Size;
|
||||
}
|
||||
* \endcode
|
||||
* This code allocates a small array with 11 scalar entries
|
||||
* and implements two equivalent loops that initialize the memory.
|
||||
* The scalar loop writes each individual int. The vectorized loop writes int_v::Size values to
|
||||
* memory per iteration. Since the size of 11 is not a multiple of int_v::Size (unless you use the
|
||||
* scalar Vc implementation) the last write access of the vector loop would normally be out of
|
||||
* bounds. But the Memory class automatically pads the memory such that the whole array can be
|
||||
* accessed with correctly aligned memory addresses.
|
||||
* (Note: the scalar loop can be auto-vectorized, except for the last three assignments.)
|
||||
*
|
||||
* \note The internal data pointer is not declared with the \c __restrict__ keyword. Therefore
|
||||
* modifying memory of V::EntryType will require the compiler to assume aliasing. If you want to use
|
||||
* the \c __restrict__ keyword you need to use a standard pointer to memory and do the vector
|
||||
* address calculation and loads and stores manually.
|
||||
*
|
||||
* \param V The vector type you want to operate on. (e.g. float_v or uint_v)
|
||||
*
|
||||
* \see Memory<V, Size>
|
||||
*
|
||||
* \ingroup Containers
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
template<typename V> class Memory<V, 0u, 0u, true> : public MemoryBase<V, Memory<V, 0u, 0u, true>, 1, void>
|
||||
{
|
||||
public:
|
||||
typedef typename V::EntryType EntryType;
|
||||
private:
|
||||
typedef MemoryBase<V, Memory<V>, 1, void> Base;
|
||||
friend class MemoryBase<V, Memory<V>, 1, void>;
|
||||
friend class MemoryDimensionBase<V, Memory<V>, 1, void>;
|
||||
enum InternalConstants {
|
||||
Alignment = V::Size,
|
||||
AlignmentMask = Alignment - 1
|
||||
};
|
||||
size_t m_entriesCount;
|
||||
size_t m_vectorsCount;
|
||||
EntryType *m_mem;
|
||||
size_t calcPaddedEntriesCount(size_t x)
|
||||
{
|
||||
size_t masked = x & AlignmentMask;
|
||||
return (masked == 0 ? x : x + (Alignment - masked));
|
||||
}
|
||||
public:
|
||||
using Base::vector;
|
||||
|
||||
/**
|
||||
* Allocate enough memory to access \p size values of type \p V::EntryType.
|
||||
*
|
||||
* The allocated memory is aligned and padded correctly for fully vectorized access.
|
||||
*
|
||||
* \param size Determines how many scalar values will fit into the allocated memory.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Memory(size_t size)
|
||||
: m_entriesCount(size),
|
||||
m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)),
|
||||
m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount))
|
||||
{
|
||||
m_vectorsCount /= V::Size;
|
||||
Base::lastVector() = V::Zero();
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the memory into a new memory area.
|
||||
*
|
||||
* The allocated memory is aligned and padded correctly for fully vectorized access.
|
||||
*
|
||||
* \param rhs The Memory object to copy from.
|
||||
*/
|
||||
template<typename Parent, typename RM>
|
||||
Vc_ALWAYS_INLINE Memory(const MemoryBase<V, Parent, 1, RM> &rhs)
|
||||
: m_entriesCount(rhs.entriesCount()),
|
||||
m_vectorsCount(rhs.vectorsCount()),
|
||||
m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
|
||||
{
|
||||
Detail::copyVectors(*this, rhs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Overload of the above function.
|
||||
*
|
||||
* (Because C++ would otherwise not use the templated cctor and use a default-constructed cctor instead.)
|
||||
*
|
||||
* \param rhs The Memory object to copy from.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Memory(const Memory &rhs)
|
||||
: m_entriesCount(rhs.entriesCount()),
|
||||
m_vectorsCount(rhs.vectorsCount()),
|
||||
m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
|
||||
{
|
||||
Detail::copyVectors(*this, rhs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees the memory which was allocated in the constructor.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE ~Memory()
|
||||
{
|
||||
Vc::free(m_mem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the contents and size information of two Memory objects.
|
||||
*
|
||||
* \param rhs The other Memory object to swap.
|
||||
*/
|
||||
inline void swap(Memory &rhs) {
|
||||
std::swap(m_mem, rhs.m_mem);
|
||||
std::swap(m_entriesCount, rhs.m_entriesCount);
|
||||
std::swap(m_vectorsCount, rhs.m_vectorsCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* \return the number of scalar entries in the whole array.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; }
|
||||
|
||||
/**
|
||||
* \return the number of vectors in the whole array.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; }
|
||||
|
||||
/**
|
||||
* Overwrite all entries with the values stored in \p rhs.
|
||||
*
|
||||
* \param rhs The object to copy the data from.
|
||||
*
|
||||
* \return reference to the modified Memory object.
|
||||
*
|
||||
* \note this function requires the vectorsCount() of both Memory objects to be equal.
|
||||
*/
|
||||
template<typename Parent, typename RM>
|
||||
Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
Detail::copyVectors(*this, rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
Detail::copyVectors(*this, rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overwrite all entries with the values stored in the memory at \p rhs.
|
||||
*
|
||||
* \param rhs The array to copy the data from.
|
||||
*
|
||||
* \return reference to the modified Memory object.
|
||||
*
|
||||
* \note this function requires that there are entriesCount() many values accessible from \p rhs.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
|
||||
std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Prefetch the cacheline containing \p addr for a single read access.
|
||||
*
|
||||
* This prefetch completely bypasses the cache, not evicting any other data.
|
||||
*
|
||||
* \param addr The cacheline containing \p addr will be prefetched.
|
||||
*
|
||||
* \ingroup Utilities
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr)
|
||||
{
|
||||
Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best<float>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefetch the cacheline containing \p addr for modification.
|
||||
*
|
||||
* This prefetch evicts data from the cache. So use it only for data you really will use. When the
|
||||
* target system supports it the cacheline will be marked as modified while prefetching, saving work
|
||||
* later on.
|
||||
*
|
||||
* \param addr The cacheline containing \p addr will be prefetched.
|
||||
*
|
||||
* \ingroup Utilities
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void prefetchForModify(const void *addr)
|
||||
{
|
||||
Vc::Detail::prefetchForModify(addr, VectorAbi::Best<float>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefetch the cacheline containing \p addr to L1 cache.
|
||||
*
|
||||
* This prefetch evicts data from the cache. So use it only for data you really will use.
|
||||
*
|
||||
* \param addr The cacheline containing \p addr will be prefetched.
|
||||
*
|
||||
* \ingroup Utilities
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void prefetchClose(const void *addr)
|
||||
{
|
||||
Vc::Detail::prefetchClose(addr, VectorAbi::Best<float>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefetch the cacheline containing \p addr to L2 cache.
|
||||
*
|
||||
* This prefetch evicts data from the cache. So use it only for data you really will use.
|
||||
*
|
||||
* \param addr The cacheline containing \p addr will be prefetched.
|
||||
*
|
||||
* \ingroup Utilities
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void prefetchMid(const void *addr)
|
||||
{
|
||||
Vc::Detail::prefetchMid(addr, VectorAbi::Best<float>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefetch the cacheline containing \p addr to L3 cache.
|
||||
*
|
||||
* This prefetch evicts data from the cache. So use it only for data you really will use.
|
||||
*
|
||||
* \param addr The cacheline containing \p addr will be prefetched.
|
||||
*
|
||||
* \ingroup Utilities
|
||||
* \headerfile memory.h <Vc/Memory>
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void prefetchFar(const void *addr)
|
||||
{
|
||||
Vc::Detail::prefetchFar(addr, VectorAbi::Best<float>());
|
||||
}
|
||||
} // namespace Common
|
||||
|
||||
using Common::Memory;
|
||||
using Common::prefetchForOneRead;
|
||||
using Common::prefetchForModify;
|
||||
using Common::prefetchClose;
|
||||
using Common::prefetchMid;
|
||||
using Common::prefetchFar;
|
||||
} // namespace Vc
|
||||
|
||||
namespace std
|
||||
{
|
||||
template<typename V> Vc_ALWAYS_INLINE void swap(Vc::Memory<V> &a, Vc::Memory<V> &b) { a.swap(b); }
|
||||
} // namespace std
|
||||
|
||||
#endif // VC_COMMON_MEMORY_H_
|
|
@ -0,0 +1,819 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MEMORYBASE_H_
|
||||
#define VC_COMMON_MEMORYBASE_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <type_traits>
|
||||
#include <iterator>
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
#define Vc_MEM_OPERATOR_EQ(op) \
|
||||
template<typename T> \
|
||||
Vc_ALWAYS_INLINE enable_if_mutable<T, MemoryVector &> operator op##=(const T &x) { \
|
||||
const V v = value() op x; \
|
||||
v.store(&m_data[0], Flags()); \
|
||||
return *this; \
|
||||
}
|
||||
/*dox{{{*/
|
||||
/**
|
||||
* Helper class for the Memory::vector(size_t) class of functions.
|
||||
*
|
||||
* You will never need to directly make use of this class. It is an implementation detail of the
|
||||
* Memory API.
|
||||
*
|
||||
* \headerfile memorybase.h <Vc/Memory>
|
||||
*//*}}}*/
|
||||
template<typename _V, typename Flags> class MemoryVector/*{{{*/
|
||||
{
|
||||
typedef typename std::remove_cv<_V>::type V;
|
||||
|
||||
template<typename T, typename R> using enable_if_mutable =
|
||||
typename std::enable_if<std::is_same<T, T>::value && !std::is_const<_V>::value, R>::type;
|
||||
|
||||
using EntryType =
|
||||
typename std::conditional<std::is_const<_V>::value, const typename V::EntryType,
|
||||
typename V::EntryType>::type;
|
||||
typedef typename V::Mask Mask;
|
||||
|
||||
EntryType m_data[V::Size];
|
||||
|
||||
public:
|
||||
// It is important that neither initialization nor cleanup is done as MemoryVector aliases
|
||||
// other memory
|
||||
Vc_INTRINSIC MemoryVector() = default;
|
||||
|
||||
// disable copies because this type is supposed to alias the data in a Memory object,
|
||||
// nothing else
|
||||
MemoryVector(const MemoryVector &) = delete;
|
||||
MemoryVector(MemoryVector &&) = delete;
|
||||
// Do not disable MemoryVector &operator=(const MemoryVector &) = delete; because it is
|
||||
// covered nicely by the operator= below.
|
||||
|
||||
//! \internal
|
||||
Vc_ALWAYS_INLINE Vc_PURE V value() const { return V(&m_data[0], Flags()); }
|
||||
|
||||
/**
|
||||
* Cast to \p V operator.
|
||||
*
|
||||
* This function allows to assign this object to any object of type \p V.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE operator V() const { return value(); }
|
||||
|
||||
template<typename T>
|
||||
Vc_ALWAYS_INLINE enable_if_mutable<T, MemoryVector &> operator=(const T &x) {
|
||||
V v;
|
||||
v = x;
|
||||
v.store(&m_data[0], Flags());
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_ALL_BINARY(Vc_MEM_OPERATOR_EQ);
|
||||
Vc_ALL_ARITHMETICS(Vc_MEM_OPERATOR_EQ);
|
||||
|
||||
Vc_ALWAYS_INLINE EntryType &operator[](size_t i) { return m_data[i]; }
|
||||
Vc_ALWAYS_INLINE const EntryType &operator[](size_t i) const { return m_data[i]; }
|
||||
};
|
||||
|
||||
template<typename _V, typename Flags> class MemoryVectorIterator
|
||||
{
|
||||
typedef typename std::remove_cv<_V>::type V;
|
||||
|
||||
template<typename T, typename R> using enable_if_mutable =
|
||||
typename std::enable_if<std::is_same<T, T>::value && !std::is_const<_V>::value, R>::type;
|
||||
|
||||
using iterator_traits = std::iterator_traits<MemoryVector<_V, Flags> *>;
|
||||
|
||||
MemoryVector<_V, Flags> *d;
|
||||
public:
|
||||
typedef typename iterator_traits::difference_type difference_type;
|
||||
typedef typename iterator_traits::value_type value_type;
|
||||
typedef typename iterator_traits::pointer pointer;
|
||||
typedef typename iterator_traits::reference reference;
|
||||
typedef typename iterator_traits::iterator_category iterator_category;
|
||||
|
||||
constexpr MemoryVectorIterator(MemoryVector<_V, Flags> *dd) : d(dd) {}
|
||||
constexpr MemoryVectorIterator(const MemoryVectorIterator &) = default;
|
||||
constexpr MemoryVectorIterator(MemoryVectorIterator &&) = default;
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator &operator=(const MemoryVectorIterator &) = default;
|
||||
|
||||
Vc_ALWAYS_INLINE void *orderBy() const { return d; }
|
||||
|
||||
Vc_ALWAYS_INLINE difference_type operator-(const MemoryVectorIterator &rhs) const { return d - rhs.d; }
|
||||
Vc_ALWAYS_INLINE reference operator[](size_t i) const { return d[i]; }
|
||||
Vc_ALWAYS_INLINE reference operator*() const { return *d; }
|
||||
Vc_ALWAYS_INLINE pointer operator->() const { return d; }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator &operator++() { ++d; return *this; }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator operator++(int) { MemoryVectorIterator r(*this); ++d; return r; }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator &operator--() { --d; return *this; }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator operator--(int) { MemoryVectorIterator r(*this); --d; return r; }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator &operator+=(size_t n) { d += n; return *this; }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator &operator-=(size_t n) { d -= n; return *this; }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator operator+(size_t n) const { return MemoryVectorIterator(d + n); }
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator operator-(size_t n) const { return MemoryVectorIterator(d - n); }
|
||||
};
|
||||
|
||||
template<typename V, typename FlagsL, typename FlagsR>
|
||||
Vc_ALWAYS_INLINE bool operator==(const MemoryVectorIterator<V, FlagsL> &l, const MemoryVectorIterator<V, FlagsR> &r)
|
||||
{
|
||||
return l.orderBy() == r.orderBy();
|
||||
}
|
||||
template<typename V, typename FlagsL, typename FlagsR>
|
||||
Vc_ALWAYS_INLINE bool operator!=(const MemoryVectorIterator<V, FlagsL> &l, const MemoryVectorIterator<V, FlagsR> &r)
|
||||
{
|
||||
return l.orderBy() != r.orderBy();
|
||||
}
|
||||
template<typename V, typename FlagsL, typename FlagsR>
|
||||
Vc_ALWAYS_INLINE bool operator>=(const MemoryVectorIterator<V, FlagsL> &l, const MemoryVectorIterator<V, FlagsR> &r)
|
||||
{
|
||||
return l.orderBy() >= r.orderBy();
|
||||
}
|
||||
template<typename V, typename FlagsL, typename FlagsR>
|
||||
Vc_ALWAYS_INLINE bool operator<=(const MemoryVectorIterator<V, FlagsL> &l, const MemoryVectorIterator<V, FlagsR> &r)
|
||||
{
|
||||
return l.orderBy() <= r.orderBy();
|
||||
}
|
||||
template<typename V, typename FlagsL, typename FlagsR>
|
||||
Vc_ALWAYS_INLINE bool operator> (const MemoryVectorIterator<V, FlagsL> &l, const MemoryVectorIterator<V, FlagsR> &r)
|
||||
{
|
||||
return l.orderBy() > r.orderBy();
|
||||
}
|
||||
template<typename V, typename FlagsL, typename FlagsR>
|
||||
Vc_ALWAYS_INLINE bool operator< (const MemoryVectorIterator<V, FlagsL> &l, const MemoryVectorIterator<V, FlagsR> &r)
|
||||
{
|
||||
return l.orderBy() < r.orderBy();
|
||||
}
|
||||
/*}}}*/
|
||||
#undef Vc_MEM_OPERATOR_EQ
|
||||
|
||||
#define Vc_VPH_OPERATOR(op) \
|
||||
template <typename V1, typename Flags1, typename V2, typename Flags2> \
|
||||
decltype(std::declval<V1>() op std::declval<V2>()) operator op( \
|
||||
const MemoryVector<V1, Flags1> &x, const MemoryVector<V2, Flags2> &y) \
|
||||
{ \
|
||||
return x.value() op y.value(); \
|
||||
}
|
||||
Vc_ALL_ARITHMETICS(Vc_VPH_OPERATOR);
|
||||
Vc_ALL_BINARY (Vc_VPH_OPERATOR);
|
||||
Vc_ALL_COMPARES (Vc_VPH_OPERATOR);
|
||||
#undef Vc_VPH_OPERATOR
|
||||
|
||||
template<typename V, typename Parent, typename Flags = Prefetch<>> class MemoryRange/*{{{*/
|
||||
{
|
||||
Parent *m_parent;
|
||||
size_t m_first;
|
||||
size_t m_last;
|
||||
|
||||
public:
|
||||
MemoryRange(Parent *p, size_t firstIndex, size_t lastIndex)
|
||||
: m_parent(p), m_first(firstIndex), m_last(lastIndex)
|
||||
{}
|
||||
|
||||
MemoryVectorIterator<V, Flags> begin() const { return &m_parent->vector(m_first , Flags()); }
|
||||
MemoryVectorIterator<V, Flags> end() const { return &m_parent->vector(m_last + 1, Flags()); }
|
||||
};/*}}}*/
|
||||
template<typename V, typename Parent, int Dimension, typename RowMemory> class MemoryDimensionBase;
|
||||
template<typename V, typename Parent, typename RowMemory> class MemoryDimensionBase<V, Parent, 1, RowMemory> // {{{1
|
||||
{
|
||||
private:
|
||||
Parent *p() { return static_cast<Parent *>(this); }
|
||||
const Parent *p() const { return static_cast<const Parent *>(this); }
|
||||
public:
|
||||
/**
|
||||
* The type of the scalar entries in the array.
|
||||
*/
|
||||
typedef typename V::EntryType EntryType;
|
||||
|
||||
/**
|
||||
* Returns a pointer to the start of the allocated memory.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE EntryType *entries() { return &p()->m_mem[0]; }
|
||||
/// Const overload of the above function.
|
||||
Vc_ALWAYS_INLINE Vc_PURE const EntryType *entries() const { return &p()->m_mem[0]; }
|
||||
|
||||
/**
|
||||
* Returns the \p i-th scalar value in the memory.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE EntryType &scalar(size_t i) { return entries()[i]; }
|
||||
/// Const overload of the above function.
|
||||
Vc_ALWAYS_INLINE Vc_PURE const EntryType scalar(size_t i) const { return entries()[i]; }
|
||||
|
||||
#ifdef DOXYGEN
|
||||
/**
|
||||
* Cast operator to the scalar type. This allows to use the object very much like a standard
|
||||
* C array.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE operator EntryType*() { return entries(); }
|
||||
/// Const overload of the above function.
|
||||
Vc_ALWAYS_INLINE Vc_PURE operator const EntryType*() const { return entries(); }
|
||||
#else
|
||||
// The above conversion operator allows implicit conversion to bool. To prohibit this
|
||||
// conversion we use SFINAE to allow only conversion to EntryType* and void*.
|
||||
template <typename T,
|
||||
typename std::enable_if<
|
||||
std::is_same<typename std::remove_const<T>::type, EntryType *>::value ||
|
||||
std::is_same<typename std::remove_const<T>::type, void *>::value,
|
||||
int>::type = 0>
|
||||
Vc_ALWAYS_INLINE Vc_PURE operator T()
|
||||
{
|
||||
return entries();
|
||||
}
|
||||
template <typename T,
|
||||
typename std::enable_if<std::is_same<T, const EntryType *>::value ||
|
||||
std::is_same<T, const void *>::value,
|
||||
int>::type = 0>
|
||||
Vc_ALWAYS_INLINE Vc_PURE operator T() const
|
||||
{
|
||||
return entries();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
template<typename Flags>
|
||||
Vc_ALWAYS_INLINE MemoryRange<V, Parent, Flags> range(size_t firstIndex, size_t lastIndex, Flags) {
|
||||
return MemoryRange<V, Parent, Flags>(p(), firstIndex, lastIndex);
|
||||
}
|
||||
Vc_ALWAYS_INLINE MemoryRange<V, Parent> range(size_t firstIndex, size_t lastIndex) {
|
||||
return MemoryRange<V, Parent>(p(), firstIndex, lastIndex);
|
||||
}
|
||||
template<typename Flags>
|
||||
Vc_ALWAYS_INLINE MemoryRange<const V, Parent, Flags> range(size_t firstIndex, size_t lastIndex, Flags) const {
|
||||
return MemoryRange<const V, Parent, Flags>(p(), firstIndex, lastIndex);
|
||||
}
|
||||
Vc_ALWAYS_INLINE MemoryRange<const V, Parent> range(size_t firstIndex, size_t lastIndex) const {
|
||||
return MemoryRange<const V, Parent>(p(), firstIndex, lastIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the \p i-th scalar value in the memory.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE EntryType &operator[](size_t i) { return entries()[i]; }
|
||||
/// Const overload of the above function.
|
||||
Vc_ALWAYS_INLINE const EntryType &operator[](size_t i) const { return entries()[i]; }
|
||||
|
||||
/**
|
||||
* Uses a vector gather to combine the entries at the indexes in \p i into the returned
|
||||
* vector object.
|
||||
*
|
||||
* \param i An integer vector. It determines the entries to be gathered.
|
||||
* \returns A vector object. Modification of this object will not modify the values in
|
||||
* memory.
|
||||
*
|
||||
* \warning The API of this function might change in future versions of Vc to additionally
|
||||
* support scatters.
|
||||
*/
|
||||
template<typename IndexT> Vc_ALWAYS_INLINE Vc_PURE V operator[](Vector<IndexT> i) const
|
||||
{
|
||||
return V(entries(), i);
|
||||
}
|
||||
};
|
||||
template<typename V, typename Parent, typename RowMemory> class MemoryDimensionBase<V, Parent, 2, RowMemory> // {{{1
|
||||
{
|
||||
private:
|
||||
Parent *p() { return static_cast<Parent *>(this); }
|
||||
const Parent *p() const { return static_cast<const Parent *>(this); }
|
||||
public:
|
||||
/**
|
||||
* The type of the scalar entries in the array.
|
||||
*/
|
||||
typedef typename V::EntryType EntryType;
|
||||
|
||||
static constexpr size_t rowCount() { return Parent::RowCount; }
|
||||
|
||||
/**
|
||||
* Returns a pointer to the start of the allocated memory.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE EntryType *entries(size_t x = 0) { return &p()->m_mem[x][0]; }
|
||||
/// Const overload of the above function.
|
||||
Vc_ALWAYS_INLINE Vc_PURE const EntryType *entries(size_t x = 0) const { return &p()->m_mem[x][0]; }
|
||||
|
||||
/**
|
||||
* Returns the \p i,j-th scalar value in the memory.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE EntryType &scalar(size_t i, size_t j) { return entries(i)[j]; }
|
||||
/// Const overload of the above function.
|
||||
Vc_ALWAYS_INLINE Vc_PURE const EntryType scalar(size_t i, size_t j) const { return entries(i)[j]; }
|
||||
|
||||
/**
|
||||
* Returns the \p i-th row in the memory.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE RowMemory &operator[](size_t i) {
|
||||
return p()->m_mem[i];
|
||||
}
|
||||
/// Const overload of the above function.
|
||||
Vc_ALWAYS_INLINE Vc_PURE const RowMemory &operator[](size_t i) const {
|
||||
return p()->m_mem[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* \return the number of rows in the array.
|
||||
*
|
||||
* \note This function can be eliminated by an optimizing compiler.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE size_t rowsCount() const { return p()->rowsCount(); }
|
||||
};
|
||||
|
||||
//dox{{{1
|
||||
/**
|
||||
* \headerfile memorybase.h <Vc/Memory>
|
||||
*
|
||||
* Common interface to all Memory classes, independent of allocation on the stack or heap.
|
||||
*
|
||||
* \param V The vector type you want to operate on. (e.g. float_v or uint_v)
|
||||
* \param Parent This type is the complete type of the class that derives from MemoryBase.
|
||||
* \param Dimension The number of dimensions the implementation provides.
|
||||
* \param RowMemory Class to be used to work on a single row.
|
||||
*/
|
||||
template<typename V, typename Parent, int Dimension, typename RowMemory> class MemoryBase : public MemoryDimensionBase<V, Parent, Dimension, RowMemory> //{{{1
|
||||
{
|
||||
static_assert((V::size() * sizeof(typename V::EntryType)) % V::MemoryAlignment == 0,
|
||||
"Vc::Memory can only be used for data-parallel types storing a number "
|
||||
"of values that's a multiple of the memory alignment.");
|
||||
|
||||
private:
|
||||
Parent *p() { return static_cast<Parent *>(this); }
|
||||
const Parent *p() const { return static_cast<const Parent *>(this); }
|
||||
|
||||
template <class Flags>
|
||||
using vector_reference = MayAlias<MemoryVector<V, Flags>> &;
|
||||
template <class Flags>
|
||||
using const_vector_reference = const MayAlias<MemoryVector<const V, Flags>> &;
|
||||
|
||||
public:
|
||||
/**
|
||||
* The type of the scalar entries in the array.
|
||||
*/
|
||||
typedef typename V::EntryType EntryType;
|
||||
|
||||
/**
|
||||
* \return the number of scalar entries in the array. This function is optimized away
|
||||
* if a constant size array is used.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return p()->entriesCount(); }
|
||||
/**
|
||||
* \return the number of vector entries that span the array. This function is optimized away
|
||||
* if a constant size array is used.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return p()->vectorsCount(); }
|
||||
|
||||
using MemoryDimensionBase<V, Parent, Dimension, RowMemory>::entries;
|
||||
using MemoryDimensionBase<V, Parent, Dimension, RowMemory>::scalar;
|
||||
|
||||
/**
|
||||
* Return a (vectorized) iterator to the start of this memory object.
|
||||
*/
|
||||
template<typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator< V, Flags> begin(Flags flags = Flags()) { return &firstVector(flags); }
|
||||
//! const overload of the above
|
||||
template<typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator<const V, Flags> begin(Flags flags = Flags()) const { return &firstVector(flags); }
|
||||
|
||||
/**
|
||||
* Return a (vectorized) iterator to the end of this memory object.
|
||||
*/
|
||||
template<typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator< V, Flags> end(Flags flags = Flags()) { return &lastVector(flags) + 1; }
|
||||
//! const overload of the above
|
||||
template<typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE MemoryVectorIterator<const V, Flags> end(Flags flags = Flags()) const { return &lastVector(flags) + 1; }
|
||||
|
||||
/**
|
||||
* \param i Selects the offset, where the vector should be read.
|
||||
*
|
||||
* \return a smart object to wrap the \p i-th vector in the memory.
|
||||
*
|
||||
* The return value can be used as any other vector object. I.e. you can substitute
|
||||
* something like
|
||||
* \code
|
||||
* float_v a = ..., b = ...;
|
||||
* a += b;
|
||||
* \endcode
|
||||
* with
|
||||
* \code
|
||||
* mem.vector(i) += b;
|
||||
* \endcode
|
||||
*
|
||||
* This function ensures that only \em aligned loads and stores are used. Thus it only allows to
|
||||
* access memory at fixed strides. If access to known offsets from the aligned vectors is
|
||||
* needed the vector(size_t, int) function can be used.
|
||||
*/
|
||||
template <typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE
|
||||
typename std::enable_if<!std::is_convertible<Flags, int>::value,
|
||||
vector_reference<Flags>>::type
|
||||
vector(size_t i, Flags = Flags())
|
||||
{
|
||||
return *aliasing_cast<MemoryVector<V, Flags>>(&entries()[i * V::Size]);
|
||||
}
|
||||
/** \brief Const overload of the above function
|
||||
*
|
||||
* \param i Selects the offset, where the vector should be read.
|
||||
*
|
||||
* \return a smart object to wrap the \p i-th vector in the memory.
|
||||
*/
|
||||
template <typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE
|
||||
typename std::enable_if<!std::is_convertible<Flags, int>::value,
|
||||
const_vector_reference<Flags>>::type
|
||||
vector(size_t i, Flags = Flags()) const
|
||||
{
|
||||
return *aliasing_cast<MemoryVector<const V, Flags>>(&entries()[i * V::Size]);
|
||||
}
|
||||
|
||||
/**
|
||||
* \return a smart object to wrap the vector starting from the \p i-th scalar entry in the memory.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* Memory<float_v, N> mem;
|
||||
* mem.setZero();
|
||||
* for (int i = 0; i < mem.entriesCount(); i += float_v::Size) {
|
||||
* mem.vectorAt(i) += b;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* \param i Specifies the scalar entry from where the vector will be loaded/stored. I.e. the
|
||||
* values scalar(i), scalar(i + 1), ..., scalar(i + V::Size - 1) will be read/overwritten.
|
||||
*
|
||||
* \param flags You must take care to determine whether an unaligned load/store is
|
||||
* required. Per default an unaligned load/store is used. If \p i is a multiple of \c V::Size
|
||||
* you may want to pass Vc::Aligned here.
|
||||
*/
|
||||
template <typename Flags = UnalignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE vector_reference<Flags> vectorAt(size_t i,
|
||||
Flags flags = Flags())
|
||||
{
|
||||
return *aliasing_cast<MemoryVector<V, Flags>>(&entries()[i]);
|
||||
}
|
||||
/** \brief Const overload of the above function
|
||||
*
|
||||
* \return a smart object to wrap the vector starting from the \p i-th scalar entry in the memory.
|
||||
*
|
||||
* \param i Specifies the scalar entry from where the vector will be loaded/stored. I.e. the
|
||||
* values scalar(i), scalar(i + 1), ..., scalar(i + V::Size - 1) will be read/overwritten.
|
||||
*
|
||||
* \param flags You must take care to determine whether an unaligned load/store is
|
||||
* required. Per default an unaligned load/store is used. If \p i is a multiple of \c V::Size
|
||||
* you may want to pass Vc::Aligned here.
|
||||
*/
|
||||
template <typename Flags = UnalignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE const_vector_reference<Flags> vectorAt(
|
||||
size_t i, Flags flags = Flags()) const
|
||||
{
|
||||
return *aliasing_cast<MemoryVector<const V, Flags>>(&entries()[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* \return a smart object to wrap the \p i-th vector + \p shift in the memory.
|
||||
*
|
||||
* This function ensures that only \em unaligned loads and stores are used.
|
||||
* It allows to access memory at any location aligned to the entry type.
|
||||
*
|
||||
* \param i Selects the memory location of the i-th vector. Thus if \p V::Size == 4 and
|
||||
* \p i is set to 3 the base address for the load/store will be the 12th entry
|
||||
* (same as \p &mem[12]).
|
||||
* \param shift Shifts the base address determined by parameter \p i by \p shift many
|
||||
* entries. Thus \p vector(3, 1) for \p V::Size == 4 will load/store the
|
||||
* 13th - 16th entries (same as \p &mem[13]).
|
||||
*
|
||||
* \note Any shift value is allowed as long as you make sure it stays within bounds of the
|
||||
* allocated memory. Shift values that are a multiple of \p V::Size will \em not result in
|
||||
* aligned loads. You have to use the above vector(size_t) function for aligned loads
|
||||
* instead.
|
||||
*
|
||||
* \note Thus a simple way to access vectors randomly is to set \p i to 0 and use \p shift as the
|
||||
* parameter to select the memory address:
|
||||
* \code
|
||||
* // don't use:
|
||||
* mem.vector(i / V::Size, i % V::Size) += 1;
|
||||
* // instead use:
|
||||
* mem.vector(0, i) += 1;
|
||||
* \endcode
|
||||
*/
|
||||
template <typename ShiftT, typename Flags = decltype(Unaligned)>
|
||||
Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if<
|
||||
std::is_convertible<ShiftT, int>::value,
|
||||
vector_reference<decltype(std::declval<Flags>() | Unaligned)>>::type
|
||||
vector(size_t i, ShiftT shift, Flags = Flags())
|
||||
{
|
||||
return *aliasing_cast<
|
||||
MemoryVector<V, decltype(std::declval<Flags>() | Unaligned)>>(
|
||||
&entries()[i * V::Size + shift]);
|
||||
}
|
||||
/// Const overload of the above function.
|
||||
template <typename ShiftT, typename Flags = decltype(Unaligned)>
|
||||
Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if<
|
||||
std::is_convertible<ShiftT, int>::value,
|
||||
const_vector_reference<decltype(std::declval<Flags>() | Unaligned)>>::type
|
||||
vector(size_t i, ShiftT shift, Flags = Flags()) const
|
||||
{
|
||||
return *aliasing_cast<
|
||||
MemoryVector<const V, decltype(std::declval<Flags>() | Unaligned)>>(
|
||||
&entries()[i * V::Size + shift]);
|
||||
}
|
||||
|
||||
/**
|
||||
* \return the first vector in the allocated memory.
|
||||
*
|
||||
* This function is simply a shorthand for vector(0).
|
||||
*/
|
||||
template <typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE vector_reference<Flags> firstVector(Flags f = Flags())
|
||||
{
|
||||
return vector(0, f);
|
||||
}
|
||||
/// Const overload of the above function.
|
||||
template <typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE const_vector_reference<Flags> firstVector(
|
||||
Flags f = Flags()) const
|
||||
{
|
||||
return vector(0, f);
|
||||
}
|
||||
|
||||
/**
|
||||
* \return the last vector in the allocated memory.
|
||||
*
|
||||
* This function is simply a shorthand for vector(vectorsCount() - 1).
|
||||
*/
|
||||
template <typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE vector_reference<Flags> lastVector(Flags f = Flags())
|
||||
{
|
||||
return vector(vectorsCount() - 1, f);
|
||||
}
|
||||
/// Const overload of the above function.
|
||||
template <typename Flags = AlignedTag>
|
||||
Vc_ALWAYS_INLINE Vc_PURE const_vector_reference<Flags> lastVector(
|
||||
Flags f = Flags()) const
|
||||
{
|
||||
return vector(vectorsCount() - 1, f);
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned char *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); }
|
||||
Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned short *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); }
|
||||
Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned int *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); }
|
||||
Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned long *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); }
|
||||
|
||||
/**
|
||||
* Zero the whole memory area.
|
||||
*/
|
||||
Vc_ALWAYS_INLINE void setZero() {
|
||||
V zero(Vc::Zero);
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) = zero;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign a value to all vectors in the array.
|
||||
*/
|
||||
template<typename U>
|
||||
Vc_ALWAYS_INLINE Parent &operator=(U &&x) {
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) = std::forward<U>(x);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to add up two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline Parent &operator+=(const MemoryBase<V, P2, Dimension, RM> &rhs) {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) += rhs.vector(i);
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to subtract two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline Parent &operator-=(const MemoryBase<V, P2, Dimension, RM> &rhs) {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) -= rhs.vector(i);
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to multiply two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline Parent &operator*=(const MemoryBase<V, P2, Dimension, RM> &rhs) {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) *= rhs.vector(i);
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to divide two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline Parent &operator/=(const MemoryBase<V, P2, Dimension, RM> &rhs) {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) /= rhs.vector(i);
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to add a value to an array.
|
||||
*/
|
||||
inline Parent &operator+=(EntryType rhs) {
|
||||
V v(rhs);
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) += v;
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to subtract a value from an array.
|
||||
*/
|
||||
inline Parent &operator-=(EntryType rhs) {
|
||||
V v(rhs);
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) -= v;
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to multiply a value to an array.
|
||||
*/
|
||||
inline Parent &operator*=(EntryType rhs) {
|
||||
V v(rhs);
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) *= v;
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand to divide an array with a value.
|
||||
*/
|
||||
inline Parent &operator/=(EntryType rhs) {
|
||||
V v(rhs);
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
vector(i) /= v;
|
||||
}
|
||||
return static_cast<Parent &>(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand compare equality of two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline bool operator==(const MemoryBase<V, P2, Dimension, RM> &rhs) const {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
if (!(V(vector(i)) == V(rhs.vector(i))).isFull()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand compare two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline bool operator!=(const MemoryBase<V, P2, Dimension, RM> &rhs) const {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
if (!(V(vector(i)) == V(rhs.vector(i))).isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand compare two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline bool operator<(const MemoryBase<V, P2, Dimension, RM> &rhs) const {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
if (!(V(vector(i)) < V(rhs.vector(i))).isFull()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand compare two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline bool operator<=(const MemoryBase<V, P2, Dimension, RM> &rhs) const {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
if (!(V(vector(i)) <= V(rhs.vector(i))).isFull()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand compare two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline bool operator>(const MemoryBase<V, P2, Dimension, RM> &rhs) const {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
if (!(V(vector(i)) > V(rhs.vector(i))).isFull()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* (Inefficient) shorthand compare two arrays.
|
||||
*/
|
||||
template<typename P2, typename RM>
|
||||
inline bool operator>=(const MemoryBase<V, P2, Dimension, RM> &rhs) const {
|
||||
assert(vectorsCount() == rhs.vectorsCount());
|
||||
for (size_t i = 0; i < vectorsCount(); ++i) {
|
||||
if (!(V(vector(i)) >= V(rhs.vector(i))).isFull()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
namespace Detail
|
||||
{
|
||||
template <typename V,
|
||||
typename ParentL,
|
||||
typename ParentR,
|
||||
int Dimension,
|
||||
typename RowMemoryL,
|
||||
typename RowMemoryR>
|
||||
inline void copyVectors(MemoryBase<V, ParentL, Dimension, RowMemoryL> &dst,
|
||||
const MemoryBase<V, ParentR, Dimension, RowMemoryR> &src)
|
||||
{
|
||||
const size_t vectorsCount = dst.vectorsCount();
|
||||
size_t i = 3;
|
||||
for (; i < vectorsCount; i += 4) {
|
||||
const V tmp3 = src.vector(i - 3);
|
||||
const V tmp2 = src.vector(i - 2);
|
||||
const V tmp1 = src.vector(i - 1);
|
||||
const V tmp0 = src.vector(i - 0);
|
||||
dst.vector(i - 3) = tmp3;
|
||||
dst.vector(i - 2) = tmp2;
|
||||
dst.vector(i - 1) = tmp1;
|
||||
dst.vector(i - 0) = tmp0;
|
||||
}
|
||||
for (i -= 3; i < vectorsCount; ++i) {
|
||||
dst.vector(i) = src.vector(i);
|
||||
}
|
||||
}
|
||||
} // namespace Detail
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_MEMORYBASE_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,46 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_MEMORYFWD_H_
|
||||
#define VC_COMMON_MEMORYFWD_H_
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
template <typename V, std::size_t Size1 = 0, std::size_t Size2 = 0,
|
||||
bool InitPadding = true>
|
||||
class Memory;
|
||||
|
||||
template <typename V, typename Parent, int Dimension, typename RowMemory>
|
||||
class MemoryBase;
|
||||
} // namespace Common
|
||||
|
||||
using Common::Memory;
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_MEMORYFWD_H_
|
|
@ -0,0 +1,258 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2012-2016 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef COMMON_OPERATORS_H_
|
||||
#define COMMON_OPERATORS_H_
|
||||
#include "simdarray.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
template <typename T, typename Abi, typename U>
|
||||
enable_if<!std::is_same<T, U>::value, U> is_convertible_to_any_vector(Vector<U, Abi>);
|
||||
template <typename T, typename Abi> T is_convertible_to_any_vector(Vector<T, Abi>);
|
||||
|
||||
template <typename T, typename U, bool = std::is_integral<T>::value,
|
||||
bool = std::is_integral<U>::value>
|
||||
struct FundamentalReturnType;
|
||||
template <class T, class U>
|
||||
using fundamental_return_t = typename FundamentalReturnType<T, U>::type;
|
||||
|
||||
template <typename T, typename U> struct FundamentalReturnType<T, U, false, false> {
|
||||
using type = typename std::conditional<
|
||||
std::is_arithmetic<U>::value,
|
||||
typename std::conditional<(sizeof(T) < sizeof(U)), U, T>::type,
|
||||
// U is not arithmetic, e.g. an enum or a type with e.g. operator int()
|
||||
T>::type;
|
||||
};
|
||||
template <typename T, typename U> struct FundamentalReturnType<T, U, true, false> {
|
||||
using type = typename std::conditional<
|
||||
std::is_arithmetic<U>::value, U,
|
||||
// U is not arithmetic, e.g. an enum or a type with e.g. operator int()
|
||||
T>::type;
|
||||
};
|
||||
template <typename T, typename U> struct FundamentalReturnType<T, U, false, true> {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <typename T> struct my_make_signed : public std::make_signed<T> {
|
||||
};
|
||||
template <> struct my_make_signed<bool> {
|
||||
using type = bool;
|
||||
};
|
||||
|
||||
template <typename TT, typename UU>
|
||||
struct higher_conversion_rank {
|
||||
template <typename A>
|
||||
using fix_sign =
|
||||
typename std::conditional<(std::is_unsigned<TT>::value ||
|
||||
std::is_unsigned<UU>::value),
|
||||
typename std::make_unsigned<A>::type, A>::type;
|
||||
using T = typename my_make_signed<TT>::type;
|
||||
using U = typename my_make_signed<UU>::type;
|
||||
template <typename Test, typename Otherwise>
|
||||
using c = typename std::conditional<std::is_same<T, Test>::value ||
|
||||
std::is_same<U, Test>::value,
|
||||
Test, Otherwise>::type;
|
||||
|
||||
using type = fix_sign<c<long long, c<long, c<int, c<short, c<signed char, void>>>>>>;
|
||||
};
|
||||
|
||||
template <typename T, typename U> struct FundamentalReturnType<T, U, true, true> {
|
||||
template <bool B, class Then, class E>
|
||||
using c = typename std::conditional<B, Then, E>::type;
|
||||
using type =
|
||||
c<(sizeof(T) > sizeof(U)), T,
|
||||
c<(sizeof(T) < sizeof(U)), U, typename higher_conversion_rank<T, U>::type>>;
|
||||
};
|
||||
|
||||
template <class V, class T, class Tq, class = void> struct ReturnTypeImpl {
|
||||
// no type => SFINAE
|
||||
};
|
||||
// 1. Vector × Vector
|
||||
template <class T, class U, class Abi, class Uq>
|
||||
struct ReturnTypeImpl<Vector<T, Abi>, Vector<U, Abi>, Uq, void> {
|
||||
using type = Vc::Vector<fundamental_return_t<T, U>, Abi>;
|
||||
};
|
||||
// 2. Vector × int
|
||||
template <class T, class Abi, class Uq>
|
||||
struct ReturnTypeImpl<Vector<T, Abi>, int, Uq, void> {
|
||||
// conversion from int is always allowed (because its the default when you hardcode a
|
||||
// number)
|
||||
using type = Vc::Vector<T, Abi>;
|
||||
};
|
||||
// 3. Vector × unsigned
|
||||
template <class T, class Abi, class Uq>
|
||||
struct ReturnTypeImpl<Vector<T, Abi>, uint, Uq, void> {
|
||||
// conversion from unsigned int is allowed for all integral Vector<T>, but ensures
|
||||
// unsigned result
|
||||
using type = Vc::Vector<
|
||||
typename std::conditional<std::is_integral<T>::value, std::make_unsigned<T>,
|
||||
std::enable_if<true, T>>::type::type,
|
||||
Abi>;
|
||||
};
|
||||
// 4. Vector × {enum, arithmetic}
|
||||
template <class T, class U, class Abi, class Uq>
|
||||
struct ReturnTypeImpl<
|
||||
Vector<T, Abi>, U, Uq,
|
||||
enable_if<!std::is_class<U>::value && !std::is_same<U, int>::value &&
|
||||
!std::is_same<U, uint>::value &&
|
||||
Traits::is_valid_vector_argument<fundamental_return_t<T, U>>::value,
|
||||
void>> {
|
||||
using type = Vc::Vector<fundamental_return_t<T, U>, Abi>;
|
||||
};
|
||||
// 5. Vector × UDT
|
||||
template <class T, class U, class Abi, class Uq>
|
||||
struct ReturnTypeImpl<
|
||||
Vector<T, Abi>, U, Uq,
|
||||
enable_if<std::is_class<U>::value && !Traits::is_simd_vector<U>::value &&
|
||||
Traits::is_valid_vector_argument<decltype(
|
||||
is_convertible_to_any_vector<T, Abi>(std::declval<Uq>()))>::value,
|
||||
void>> {
|
||||
using type =
|
||||
Vc::Vector<fundamental_return_t<T, decltype(is_convertible_to_any_vector<T, Abi>(
|
||||
std::declval<Uq>()))>,
|
||||
Abi>;
|
||||
};
|
||||
template <class V, class Tq, class T = remove_cvref_t<Tq>>
|
||||
using ReturnType = typename ReturnTypeImpl<V, T, Tq>::type;
|
||||
|
||||
template <class T> struct is_a_type : public std::true_type {
|
||||
};
|
||||
|
||||
#ifdef Vc_ENABLE_FLOAT_BIT_OPERATORS
|
||||
#define Vc_TEST_FOR_BUILTIN_OPERATOR(op_) true
|
||||
#else
|
||||
#define Vc_TEST_FOR_BUILTIN_OPERATOR(op_) \
|
||||
Detail::is_a_type<decltype(std::declval<typename R::value_type>() \
|
||||
op_ std::declval<typename R::value_type>())>::value
|
||||
#endif
|
||||
} // namespace Detail
|
||||
|
||||
#define Vc_GENERIC_OPERATOR(op_) \
|
||||
template <class T, class Abi, class U, \
|
||||
class R = Detail::ReturnType<Vector<T, Abi>, U>> \
|
||||
Vc_ALWAYS_INLINE enable_if<Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
|
||||
std::is_convertible<Vector<T, Abi>, R>::value && \
|
||||
std::is_convertible<U, R>::value, \
|
||||
R> \
|
||||
operator op_(Vector<T, Abi> x, U &&y) \
|
||||
{ \
|
||||
return Detail::operator op_(R(x), R(std::forward<U>(y))); \
|
||||
} \
|
||||
template <class T, class Abi, class U, \
|
||||
class R = Detail::ReturnType<Vector<T, Abi>, U>> \
|
||||
Vc_ALWAYS_INLINE enable_if<Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
|
||||
!Traits::is_simd_vector<U>::value && \
|
||||
std::is_convertible<Vector<T, Abi>, R>::value && \
|
||||
std::is_convertible<U, R>::value, \
|
||||
R> \
|
||||
operator op_(U &&x, Vector<T, Abi> y) \
|
||||
{ \
|
||||
return Detail::operator op_(R(std::forward<U>(x)), R(y)); \
|
||||
} \
|
||||
template <class T, class Abi, class U, \
|
||||
class R = Detail::ReturnType<Vector<T, Abi>, U>> \
|
||||
Vc_ALWAYS_INLINE enable_if<Vc_TEST_FOR_BUILTIN_OPERATOR(op_) && \
|
||||
std::is_convertible<Vector<T, Abi>, R>::value && \
|
||||
std::is_convertible<U, R>::value, \
|
||||
Vector<T, Abi> &> \
|
||||
operator op_##=(Vector<T, Abi> &x, U &&y) \
|
||||
{ \
|
||||
x = Detail::operator op_(R(x), R(std::forward<U>(y))); \
|
||||
return x; \
|
||||
}
|
||||
|
||||
#define Vc_LOGICAL_OPERATOR(op_) \
|
||||
template <class T, class Abi> \
|
||||
Vc_ALWAYS_INLINE typename Vector<T, Abi>::Mask operator op_(Vector<T, Abi> x, \
|
||||
Vector<T, Abi> y) \
|
||||
{ \
|
||||
return !!x op_ !!y; \
|
||||
} \
|
||||
template <class T, class Abi, class U> \
|
||||
Vc_ALWAYS_INLINE \
|
||||
enable_if<std::is_convertible<Vector<T, Abi>, Vector<U, Abi>>::value && \
|
||||
std::is_convertible<Vector<U, Abi>, Vector<T, Abi>>::value, \
|
||||
typename Detail::ReturnType<Vector<T, Abi>, Vector<U, Abi>>::Mask> \
|
||||
operator op_(Vector<T, Abi> x, Vector<U, Abi> y) \
|
||||
{ \
|
||||
return !!x op_ !!y; \
|
||||
} \
|
||||
template <class T, class Abi, class U> \
|
||||
Vc_ALWAYS_INLINE enable_if<std::is_same<bool, decltype(!std::declval<U>())>::value, \
|
||||
typename Vector<T, Abi>::Mask> \
|
||||
operator op_(Vector<T, Abi> x, U &&y) \
|
||||
{ \
|
||||
using M = typename Vector<T, Abi>::Mask; \
|
||||
return !!x op_ M(!!std::forward<U>(y)); \
|
||||
} \
|
||||
template <class T, class Abi, class U> \
|
||||
Vc_ALWAYS_INLINE enable_if<std::is_same<bool, decltype(!std::declval<U>())>::value, \
|
||||
typename Vector<T, Abi>::Mask> \
|
||||
operator op_(U &&x, Vector<T, Abi> y) \
|
||||
{ \
|
||||
using M = typename Vector<T, Abi>::Mask; \
|
||||
return M(!!std::forward<U>(x)) op_ !!y; \
|
||||
}
|
||||
|
||||
#define Vc_COMPARE_OPERATOR(op_) \
|
||||
template <class T, class Abi, class U, \
|
||||
class R = Detail::ReturnType<Vector<T, Abi>, U>> \
|
||||
Vc_ALWAYS_INLINE enable_if<std::is_convertible<Vector<T, Abi>, R>::value && \
|
||||
std::is_convertible<U, R>::value, \
|
||||
typename R::Mask> \
|
||||
operator op_(Vector<T, Abi> x, U &&y) \
|
||||
{ \
|
||||
return Detail::operator op_(R(x), R(std::forward<U>(y))); \
|
||||
} \
|
||||
template <class T, class Abi, class U, \
|
||||
class R = Detail::ReturnType<Vector<T, Abi>, U>> \
|
||||
Vc_ALWAYS_INLINE \
|
||||
enable_if<!Traits::is_simd_vector_internal<remove_cvref_t<U>>::value && \
|
||||
std::is_convertible<Vector<T, Abi>, R>::value && \
|
||||
std::is_convertible<U, R>::value, \
|
||||
typename R::Mask> \
|
||||
operator op_(U &&x, Vector<T, Abi> y) \
|
||||
{ \
|
||||
return Detail::operator op_(R(std::forward<U>(x)), R(y)); \
|
||||
}
|
||||
|
||||
Vc_ALL_LOGICAL (Vc_LOGICAL_OPERATOR);
|
||||
Vc_ALL_BINARY (Vc_GENERIC_OPERATOR);
|
||||
Vc_ALL_ARITHMETICS(Vc_GENERIC_OPERATOR);
|
||||
Vc_ALL_COMPARES (Vc_COMPARE_OPERATOR);
|
||||
|
||||
#undef Vc_LOGICAL_OPERATOR
|
||||
#undef Vc_GENERIC_OPERATOR
|
||||
#undef Vc_COMPARE_OPERATOR
|
||||
#undef Vc_INVALID_OPERATOR
|
||||
|
||||
} // namespace Vc
|
||||
#endif // COMMON_OPERATORS_H_
|
|
@ -0,0 +1,44 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_PERMUTATION_H_
|
||||
#define VC_COMMON_PERMUTATION_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Permutation
|
||||
{
|
||||
struct ReversedTag {};
|
||||
constexpr ReversedTag Reversed{};
|
||||
} // namespace Permutation
|
||||
}
|
||||
|
||||
#endif // VC_COMMON_PERMUTATION_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,270 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SCATTERIMPLEMENTATION_H_
|
||||
#define VC_COMMON_SCATTERIMPLEMENTATION_H_
|
||||
|
||||
#include "gatherimplementation.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeScatter(SetIndexZeroT,
|
||||
V &v,
|
||||
MT *mem,
|
||||
IT indexes,
|
||||
typename V::MaskArgument mask)
|
||||
{
|
||||
indexes.setZeroInverted(static_cast<typename IT::Mask>(mask));
|
||||
// Huh?
|
||||
const V tmp(mem, indexes);
|
||||
where(mask) | v = tmp;
|
||||
}
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeScatter(SimpleLoopT,
|
||||
V &v,
|
||||
MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask)
|
||||
{
|
||||
if (Vc_IS_UNLIKELY(mask.isEmpty())) {
|
||||
return;
|
||||
}
|
||||
Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
|
||||
if (mask[i])
|
||||
mem[indexes[i]] = v[i];
|
||||
});
|
||||
}
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeScatter(BitScanLoopT,
|
||||
V &v,
|
||||
MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask)
|
||||
{
|
||||
size_t bits = mask.toInt();
|
||||
while (Vc_IS_LIKELY(bits > 0)) {
|
||||
size_t i, j;
|
||||
asm("bsf %[bits],%[i]\n\t"
|
||||
"bsr %[bits],%[j]\n\t"
|
||||
"btr %[i],%[bits]\n\t"
|
||||
"btr %[j],%[bits]\n\t"
|
||||
: [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits));
|
||||
mem[indexes[i]] = v[i];
|
||||
mem[indexes[j]] = v[j];
|
||||
}
|
||||
|
||||
/* Alternative from Vc::SSE (0.7)
|
||||
int bits = mask.toInt();
|
||||
while (bits) {
|
||||
const int i = _bit_scan_forward(bits);
|
||||
bits ^= (1 << i); // btr?
|
||||
mem[indexes[i]] = v[i];
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
|
||||
V &v,
|
||||
MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 16> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low, high = 0;
|
||||
switch (Vc::Detail::popcnt16(bits)) {
|
||||
case 16:
|
||||
v.scatter(mem, indexes);
|
||||
break;
|
||||
case 15:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= 1 << low;
|
||||
mem[indexes[low]] = v[low];
|
||||
case 14:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 13:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 12:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 11:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 10:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 9:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 8:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 7:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 6:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 5:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 4:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 3:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 2:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
|
||||
V &v,
|
||||
MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 8> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low, high = 0;
|
||||
switch (Vc::Detail::popcnt8(bits)) {
|
||||
case 8:
|
||||
v.scatter(mem, indexes);
|
||||
break;
|
||||
case 7:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= 1 << low;
|
||||
mem[indexes[low]] = v[low];
|
||||
case 6:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 5:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 4:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
high = (1 << high);
|
||||
case 3:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= high | (1 << low);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 2:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
|
||||
V &v,
|
||||
MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 4> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low, high = 0;
|
||||
switch (Vc::Detail::popcnt4(bits)) {
|
||||
case 4:
|
||||
v.scatter(mem, indexes);
|
||||
break;
|
||||
case 3:
|
||||
low = _bit_scan_forward(bits);
|
||||
bits ^= 1 << low;
|
||||
mem[indexes[low]] = v[low];
|
||||
case 2:
|
||||
high = _bit_scan_reverse(bits);
|
||||
mem[indexes[high]] = v[high];
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
template <typename V, typename MT, typename IT>
|
||||
Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
|
||||
V &v,
|
||||
MT *mem,
|
||||
const IT &indexes,
|
||||
typename V::MaskArgument mask,
|
||||
enable_if<V::Size == 2> = nullarg)
|
||||
{
|
||||
unsigned int bits = mask.toInt();
|
||||
unsigned int low;
|
||||
switch (Vc::Detail::popcnt4(bits)) {
|
||||
case 2:
|
||||
v.scatter(mem, indexes);
|
||||
break;
|
||||
case 1:
|
||||
low = _bit_scan_forward(bits);
|
||||
mem[indexes[low]] = v[low];
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_SCATTERIMPLEMENTATION_H_
|
|
@ -0,0 +1,136 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// scatters
|
||||
// A scatter takes the following arguments:
|
||||
// 1. A pointer to memory of any type that EntryType can convert to.
|
||||
// 2. An indexes “vector”. The requirement is that the type implements the subscript operator,
|
||||
// stores «Size» valid index values, and each offset to the pointer above yields a valid
|
||||
// memory location for reading.
|
||||
// 3. Optionally the third argument may be a mask. The mask disables several memory stores and
|
||||
// thus removes the requirements in (2.) for the disabled entries.
|
||||
|
||||
private:
|
||||
/**\internal
|
||||
* This function implements a scatter given a pointer to memory \p mem and some
|
||||
* container object storing the scatter \p indexes.
|
||||
*
|
||||
* \param mem This pointer must be aligned correctly for the type \p MT. This is the
|
||||
* natural behavior of C++, so this is typically the case.
|
||||
* \param indexes This object contains at least \VSize{T} indexes that denote the
|
||||
* offset in \p mem where the components for the current vector should be copied to.
|
||||
* The offset is not in Bytes, but in multiples of `sizeof(MT)`.
|
||||
*/
|
||||
// enable_if<std::can_convert<MT, EntryType>::value && has_subscript_operator<IT>::value>
|
||||
template <typename MT, typename IT>
|
||||
inline void scatterImplementation(MT *mem, IT &&indexes) const;
|
||||
|
||||
/**\internal
|
||||
* This overload of the above function adds a \p mask argument to disable memory
|
||||
* accesses at the \p indexes offsets where \p mask is \c false.
|
||||
*/
|
||||
template <typename MT, typename IT>
|
||||
inline void scatterImplementation(MT *mem, IT &&indexes, MaskArgument mask) const;
|
||||
|
||||
public:
|
||||
#define Vc_ASSERT_SCATTER_PARAMETER_TYPES_ \
|
||||
static_assert( \
|
||||
std::is_convertible<EntryType, MT>::value, \
|
||||
"The memory pointer needs to point to a type that the EntryType of this " \
|
||||
"SIMD vector type can be converted to."); \
|
||||
static_assert( \
|
||||
Vc::Traits::has_subscript_operator<IT>::value, \
|
||||
"The indexes argument must be a type that implements the subscript operator."); \
|
||||
static_assert( \
|
||||
!Traits::is_simd_vector<IT>::value || \
|
||||
Traits::simd_vector_size<IT>::value >= Size, \
|
||||
"If you use a SIMD vector for the indexes parameter, the index vector must " \
|
||||
"have at least as many entries as this SIMD vector."); \
|
||||
static_assert( \
|
||||
!std::is_array<T>::value || \
|
||||
(std::rank<T>::value == 1 && \
|
||||
(std::extent<T>::value == 0 || std::extent<T>::value >= Size)), \
|
||||
"If you use a simple array for the indexes parameter, the array must have " \
|
||||
"at least as many entries as this SIMD vector.")
|
||||
|
||||
/**
|
||||
* \name Scatter functions
|
||||
*
|
||||
* Stores a vector to the objects at `mem[indexes[0]]`, `mem[indexes[1]]`,
|
||||
* `mem[indexes[2]]`, ...
|
||||
*
|
||||
* \param mem A pointer to memory which contains objects of type \p MT at the offsets
|
||||
* given by \p indexes.
|
||||
* \param indexes
|
||||
* \param mask
|
||||
*/
|
||||
///@{
|
||||
|
||||
/// Scatter function
|
||||
template <typename MT,
|
||||
typename IT,
|
||||
typename = enable_if<Vc::Traits::has_subscript_operator<IT>::value>>
|
||||
Vc_INTRINSIC void scatter(MT *mem, IT &&indexes) const
|
||||
{
|
||||
Vc_ASSERT_SCATTER_PARAMETER_TYPES_;
|
||||
scatterImplementation(mem, std::forward<IT>(indexes));
|
||||
}
|
||||
|
||||
/// Masked scatter function
|
||||
template <typename MT,
|
||||
typename IT,
|
||||
typename = enable_if<Vc::Traits::has_subscript_operator<IT>::value>>
|
||||
Vc_INTRINSIC void scatter(MT *mem, IT &&indexes, MaskArgument mask) const
|
||||
{
|
||||
Vc_ASSERT_SCATTER_PARAMETER_TYPES_;
|
||||
scatterImplementation(mem, std::forward<IT>(indexes), mask);
|
||||
}
|
||||
///@}
|
||||
|
||||
#include "scatterinterface_deprecated.h"
|
||||
|
||||
/**\internal
|
||||
* \name Scatter function to use from Vc::Common::subscript_operator
|
||||
*
|
||||
* \param args
|
||||
* \param mask
|
||||
*/
|
||||
///@{
|
||||
template <typename MT, typename IT>
|
||||
Vc_INTRINSIC void scatter(const Common::ScatterArguments<MT, IT> &args) const
|
||||
{
|
||||
scatter(args.address, args.indexes);
|
||||
}
|
||||
|
||||
template <typename MT, typename IT>
|
||||
Vc_INTRINSIC void scatter(const Common::ScatterArguments<MT, IT> &args, MaskArgument mask) const
|
||||
{
|
||||
scatter(args.address, args.indexes, mask);
|
||||
}
|
||||
///@}
|
||||
#undef Vc_ASSERT_SCATTER_PARAMETER_TYPES_
|
|
@ -0,0 +1,147 @@
|
|||
/// \name Deprecated Members
|
||||
///@{
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
*/
|
||||
template <typename S1, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void scatter(S1 *array, EntryType S1::*member1,
|
||||
IT indexes) const
|
||||
{
|
||||
scatter(Common::SubscriptOperation<S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1]
|
||||
.scatterArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void scatter(S1 *array, EntryType S1::*member1,
|
||||
IT indexes, MaskArgument mask) const
|
||||
{
|
||||
scatter(Common::SubscriptOperation<S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1]
|
||||
.scatterArguments(),
|
||||
mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that
|
||||
* struct (i.e. array[i].*member1.*member2 is read).
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
*/
|
||||
template <typename S1, typename S2, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void scatter(S1 *array, S2 S1::*member1,
|
||||
EntryType S2::*member2,
|
||||
IT indexes) const
|
||||
{
|
||||
scatter(Common::SubscriptOperation<S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1][member2]
|
||||
.scatterArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param member1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that
|
||||
* struct (i.e. array[i].*member1.*member2 is read).
|
||||
* \param indexes Determines the offsets into \p array where the values are gathered from/scattered
|
||||
* to. The type of indexes can either be an integer vector or a type that supports
|
||||
* operator[] access.
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename S2, typename IT>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void scatter(S1 *array, S2 S1::*member1,
|
||||
EntryType S2::*member2, IT indexes,
|
||||
MaskArgument mask) const
|
||||
{
|
||||
scatter(Common::SubscriptOperation<S1, IT, std::ratio<1, 1>, true>(
|
||||
array, indexes)[member1][member2]
|
||||
.scatterArguments(),
|
||||
mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param outerIndexes
|
||||
* \param innerIndexes
|
||||
*/
|
||||
template <typename S1, typename IT1, typename IT2>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void scatter(S1 *array, EntryType *S1::*ptrMember1,
|
||||
IT1 outerIndexes,
|
||||
IT2 innerIndexes) const
|
||||
{
|
||||
scatter(Common::SubscriptOperation<S1, IT1, std::ratio<1, 1>, true>(
|
||||
array, outerIndexes)[ptrMember1][innerIndexes]
|
||||
.scatterArguments());
|
||||
}
|
||||
|
||||
/**
|
||||
* \deprecated Use Vc::array or Vc::vector subscripting instead.
|
||||
*
|
||||
* \param array A pointer into memory (without alignment restrictions).
|
||||
* \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to
|
||||
* be read. Thus the offsets in \p indexes are relative to the \p array and not to
|
||||
* the size of the gathered type (i.e. array[i].*member1 is accessed instead of
|
||||
* (&(array->*member1))[i])
|
||||
* \param outerIndexes
|
||||
* \param innerIndexes
|
||||
* \param mask If a mask is given only the active entries will be gathered/scattered.
|
||||
*/
|
||||
template <typename S1, typename IT1, typename IT2>
|
||||
Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector "
|
||||
"instead.") inline void scatter(S1 *array, EntryType *S1::*ptrMember1,
|
||||
IT1 outerIndexes, IT2 innerIndexes,
|
||||
MaskArgument mask) const
|
||||
{
|
||||
scatter(Common::SubscriptOperation<S1, IT1, std::ratio<1, 1>, true>(
|
||||
array, outerIndexes)[ptrMember1][innerIndexes]
|
||||
.scatterArguments(),
|
||||
mask);
|
||||
}
|
||||
///@}
|
|
@ -0,0 +1,92 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SET_H_
|
||||
#define VC_COMMON_SET_H_
|
||||
|
||||
#include "macros.h"
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace
|
||||
{
|
||||
static Vc_INTRINSIC Vc_CONST __m128i set(unsigned short x0, unsigned short x1, unsigned short x2, unsigned short x3,
|
||||
unsigned short x4, unsigned short x5, unsigned short x6, unsigned short x7)
|
||||
{
|
||||
#if defined(Vc_GNU_ASM)
|
||||
#if 0 // defined(__x86_64__)
|
||||
// it appears that the 32bit variant is always faster
|
||||
__m128i r;
|
||||
unsigned long long tmp0 = x3; tmp0 = (tmp0 << 16) | x2;
|
||||
unsigned long long tmp1 = x1; tmp1 = (tmp1 << 16) | x0;
|
||||
asm("vmovq %1,%0" : "=x"(r) : "r"((tmp0 << 32) | tmp1));
|
||||
unsigned long long tmp2 = x7; tmp2 = (tmp2 << 16) | x6;
|
||||
unsigned long long tmp3 = x5; tmp3 = (tmp3 << 16) | x4;
|
||||
asm("vpinsrq $1,%1,%0,%0" : "+x"(r) : "r"((tmp2 << 32) | tmp3));
|
||||
return r;
|
||||
#elif defined(Vc_USE_VEX_CODING)
|
||||
__m128i r0, r1;
|
||||
unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0;
|
||||
unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2;
|
||||
unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4;
|
||||
unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6;
|
||||
asm("vmovd %1,%0" : "=x"(r0) : "r"(tmp0));
|
||||
asm("vpinsrd $1,%1,%0,%0" : "+x"(r0) : "r"(tmp1));
|
||||
asm("vmovd %1,%0" : "=x"(r1) : "r"(tmp2));
|
||||
asm("vpinsrd $1,%1,%0,%0" : "+x"(r1) : "r"(tmp3));
|
||||
asm("vpunpcklqdq %1,%0,%0" : "+x"(r0) : "x"(r1));
|
||||
return r0;
|
||||
#else
|
||||
__m128i r0, r1;
|
||||
unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0;
|
||||
unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2;
|
||||
unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4;
|
||||
unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6;
|
||||
asm("movd %1,%0" : "=x"(r0) : "r"(tmp0));
|
||||
asm("pinsrd $1,%1,%0" : "+x"(r0) : "r"(tmp1));
|
||||
asm("movd %1,%0" : "=x"(r1) : "r"(tmp2));
|
||||
asm("pinsrd $1,%1,%0" : "+x"(r1) : "r"(tmp3));
|
||||
asm("punpcklqdq %1,%0" : "+x"(r0) : "x"(r1));
|
||||
return r0;
|
||||
#endif
|
||||
#else
|
||||
unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0;
|
||||
unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2;
|
||||
unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4;
|
||||
unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6;
|
||||
return _mm_setr_epi32(tmp0, tmp1, tmp2, tmp3);
|
||||
#endif
|
||||
}
|
||||
static Vc_INTRINSIC Vc_CONST __m128i set(short x0, short x1, short x2, short x3, short x4, short x5, short x6, short x7)
|
||||
{
|
||||
return set(static_cast<unsigned short>(x0), static_cast<unsigned short>(x1), static_cast<unsigned short>(x2),
|
||||
static_cast<unsigned short>(x3), static_cast<unsigned short>(x4), static_cast<unsigned short>(x5),
|
||||
static_cast<unsigned short>(x6), static_cast<unsigned short>(x7));
|
||||
}
|
||||
} // anonymous namespace
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_SET_H_
|
|
@ -0,0 +1,68 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SIMD_CAST_H_
|
||||
#define VC_COMMON_SIMD_CAST_H_
|
||||
|
||||
#include <type_traits>
|
||||
#include "macros.h"
|
||||
|
||||
// declare a bogus simd_cast function template in the global namespace to enable ADL for
|
||||
// simd_cast<T>
|
||||
template <class> void simd_cast();
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/**
|
||||
* Casts the argument \p x from type \p From to type \p To.
|
||||
*
|
||||
* This function implements the trivial case where \p To and \p From are the same type.
|
||||
*
|
||||
* \param x The object of type \p From to be converted to type \p To.
|
||||
* \returns An object of type \p To with all vector components converted according to
|
||||
* standard conversion behavior as mandated by the C++ standard for the
|
||||
* underlying arithmetic types.
|
||||
*/
|
||||
template <typename To, typename From>
|
||||
Vc_INTRINSIC Vc_CONST To
|
||||
simd_cast(From &&x, enable_if<std::is_same<To, Traits::decay<From>>::value> = nullarg)
|
||||
{
|
||||
return std::forward<From>(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* A cast from nothing results in default-initialization of \p To.
|
||||
*
|
||||
* This function can be useful in generic code where a parameter pack expands to nothing.
|
||||
*
|
||||
* \returns A zero-initialized object of type \p To.
|
||||
*/
|
||||
template <typename To> Vc_INTRINSIC Vc_CONST To simd_cast() { return To(); }
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_SIMD_CAST_H_
|
|
@ -0,0 +1,79 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SIMD_CAST_CALLER_TCC_
|
||||
#define VC_COMMON_SIMD_CAST_CALLER_TCC_
|
||||
|
||||
#include "macros.h"
|
||||
namespace Vc_VERSIONED_NAMESPACE {
|
||||
template <class T, std::size_t N, class VectorType>
|
||||
template <class U, class V, class>
|
||||
Vc_INTRINSIC SimdMaskArray<T, N, VectorType, N>::SimdMaskArray(
|
||||
const SimdMaskArray<U, N, V> &x)
|
||||
: data(simd_cast<mask_type>(internal_data(x)))
|
||||
{
|
||||
}
|
||||
template <class T, std::size_t N, class VectorType>
|
||||
template <class U, class V, class, class>
|
||||
Vc_INTRINSIC SimdMaskArray<T, N, VectorType, N>::SimdMaskArray(
|
||||
const SimdMaskArray<U, N, V> &x)
|
||||
: data(simd_cast<mask_type>(internal_data(internal_data0(x)),
|
||||
internal_data(internal_data1(x))))
|
||||
{
|
||||
}
|
||||
template <class T, std::size_t N, class VectorType>
|
||||
template <class U, class V, class, class, class>
|
||||
Vc_INTRINSIC SimdMaskArray<T, N, VectorType, N>::SimdMaskArray(
|
||||
const SimdMaskArray<U, N, V> &x)
|
||||
: data(simd_cast<mask_type>(internal_data(internal_data0(internal_data0(x))),
|
||||
internal_data(internal_data1(internal_data0(x))),
|
||||
internal_data(internal_data0(internal_data1(x))),
|
||||
internal_data(internal_data1(internal_data1(x)))))
|
||||
{
|
||||
}
|
||||
// conversion from any Segment object (could be SimdMaskArray or Mask<T>)
|
||||
template <class T, std::size_t N, class VectorType>
|
||||
template <class M, std::size_t Pieces, std::size_t Index>
|
||||
Vc_INTRINSIC SimdMaskArray<T, N, VectorType, N>::SimdMaskArray(
|
||||
Common::Segment<M, Pieces, Index> &&x,
|
||||
enable_if<Traits::simd_vector_size<M>::value == Size * Pieces>)
|
||||
: data(simd_cast<mask_type, Index>(x.data))
|
||||
{
|
||||
}
|
||||
// conversion from Mask<T>
|
||||
template <class T, std::size_t N, class VectorType>
|
||||
template <class M, class>
|
||||
Vc_INTRINSIC SimdMaskArray<T, N, VectorType, N>::SimdMaskArray(M k)
|
||||
: data(simd_cast<mask_type>(k))
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace Vc_VERSIONED_NAMESPACE
|
||||
|
||||
#endif // VC_COMMON_SIMD_CAST_CALLER_TCC_
|
||||
|
||||
// vim: foldmethod=marker
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,210 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SIMDARRAYFWD_H_
|
||||
#define VC_COMMON_SIMDARRAYFWD_H_
|
||||
|
||||
#include "../scalar/types.h"
|
||||
#include "../sse/types.h"
|
||||
#include "../avx/types.h"
|
||||
|
||||
#include "utility.h"
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
// specialization of Vector for fixed_size<N> {{{
|
||||
template <class T, int N>
|
||||
class Vector<T, simd_abi::fixed_size<N>> : public SimdArray<T, N>
|
||||
{
|
||||
using SimdArray<T, N>::SimdArray;
|
||||
|
||||
public:
|
||||
// overload copy to force argument passing via the stack. This makes the type more
|
||||
// usable on ABI boundaries
|
||||
Vc_INTRINSIC Vector(const Vector &x) : SimdArray<T, N>(x) {}
|
||||
Vc_INTRINSIC Vector &operator=(const Vector &x)
|
||||
{
|
||||
SimdArray<T, N>::operator=(x);
|
||||
return *this;
|
||||
}
|
||||
Vector() = default;
|
||||
|
||||
using abi_type = simd_abi::fixed_size<N>;
|
||||
using abi = abi_type;
|
||||
|
||||
Vc_DEPRECATED("use Vector([](int n) { return n; }) instead of "
|
||||
"Vector::IndexesFromZero()") static Vector IndexesFromZero()
|
||||
{
|
||||
return Vector([](size_t i) -> T { return i; });
|
||||
}
|
||||
Vc_DEPRECATED("use 0 instead of Vector::Zero()") static Vector Zero() { return 0; }
|
||||
Vc_DEPRECATED("use 1 instead of Vector::One()") static Vector One() { return 1; }
|
||||
};
|
||||
|
||||
template <class T, int N>
|
||||
class Mask<T, simd_abi::fixed_size<N>> : public SimdMaskArray<T, N>
|
||||
{
|
||||
using SimdMaskArray<T, N>::SimdMaskArray;
|
||||
|
||||
public:
|
||||
// overload copy to force argument passing via the stack. This makes the type more
|
||||
// usable on ABI boundaries
|
||||
Vc_INTRINSIC Mask(const Mask &x) : SimdMaskArray<T, N>(x) {}
|
||||
Vc_INTRINSIC Mask &operator=(const Mask &x)
|
||||
{
|
||||
SimdMaskArray<T, N>::operator=(x);
|
||||
return *this;
|
||||
}
|
||||
Mask() = default;
|
||||
|
||||
using abi_type = simd_abi::fixed_size<N>;
|
||||
using abi = abi_type;
|
||||
};
|
||||
// }}}
|
||||
|
||||
/** \internal
|
||||
* Simple traits for SimdArray to easily access internal types of non-atomic SimdArray
|
||||
* types.
|
||||
*/
|
||||
template <typename T, std::size_t N> struct SimdArrayTraits {
|
||||
static constexpr std::size_t N0 = Common::left_size<N>();
|
||||
static constexpr std::size_t N1 = Common::right_size<N>();
|
||||
|
||||
using storage_type0 = fixed_size_simd<T, N0>;
|
||||
using storage_type1 = fixed_size_simd<T, N1>;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
|
||||
Vc_INTRINSIC_L typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
|
||||
SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
|
||||
template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
|
||||
Vc_INTRINSIC_L typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
|
||||
SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
|
||||
template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
|
||||
Vc_INTRINSIC_L const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
|
||||
const SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
|
||||
template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
|
||||
Vc_INTRINSIC_L const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
|
||||
const SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
|
||||
|
||||
template <typename T, std::size_t N, typename V>
|
||||
Vc_INTRINSIC_L V &internal_data(SimdArray<T, N, V, N> &x) Vc_INTRINSIC_R;
|
||||
template <typename T, std::size_t N, typename V>
|
||||
Vc_INTRINSIC_L const V &internal_data(const SimdArray<T, N, V, N> &x) Vc_INTRINSIC_R;
|
||||
|
||||
namespace Traits
|
||||
{
|
||||
// is_fixed_size_simd {{{1
|
||||
template <class T> struct is_fixed_size_simd : std::false_type {
|
||||
};
|
||||
template <class T, int N>
|
||||
struct is_fixed_size_simd<fixed_size_simd<T, N>> : std::true_type {
|
||||
};
|
||||
template <class T, int N>
|
||||
struct is_fixed_size_simd<fixed_size_simd_mask<T, N>> : std::true_type {
|
||||
};
|
||||
|
||||
// is_simd_vector_internal {{{1
|
||||
template <class T, int N>
|
||||
struct is_simd_vector_internal<fixed_size_simd<T, N>> : is_valid_vector_argument<T> {};
|
||||
|
||||
// is_simd_mask_internal {{{1
|
||||
template <class T, int N>
|
||||
struct is_simd_mask_internal<fixed_size_simd_mask<T, N>> : is_valid_vector_argument<T> {};
|
||||
|
||||
// is_atomic_simdarray_internal {{{1
|
||||
template <typename T, std::size_t N, typename V>
|
||||
struct is_atomic_simdarray_internal<SimdArray<T, N, V, N>> : is_valid_vector_argument<T> {};
|
||||
template <typename T, int N>
|
||||
struct is_atomic_simdarray_internal<fixed_size_simd<T, N>>
|
||||
: is_atomic_simdarray_internal<SimdArray<T, N>> {
|
||||
};
|
||||
|
||||
// is_atomic_simd_mask_array_internal {{{1
|
||||
template <typename T, std::size_t N, typename V>
|
||||
struct is_atomic_simd_mask_array_internal<SimdMaskArray<T, N, V, N>>
|
||||
: is_valid_vector_argument<T> {
|
||||
};
|
||||
template <typename T, int N>
|
||||
struct is_atomic_simd_mask_array_internal<fixed_size_simd_mask<T, N>>
|
||||
: is_atomic_simd_mask_array_internal<SimdMaskArray<T, N>> {
|
||||
};
|
||||
|
||||
// is_simdarray_internal {{{1
|
||||
template <typename T, std::size_t N, typename VectorType, std::size_t M>
|
||||
struct is_simdarray_internal<SimdArray<T, N, VectorType, M>>
|
||||
: is_valid_vector_argument<T> {
|
||||
};
|
||||
template <typename T, int N>
|
||||
struct is_simdarray_internal<fixed_size_simd<T, N>> : is_valid_vector_argument<T> {
|
||||
};
|
||||
|
||||
// is_simd_mask_array_internal {{{1
|
||||
template <typename T, std::size_t N, typename VectorType, std::size_t M>
|
||||
struct is_simd_mask_array_internal<SimdMaskArray<T, N, VectorType, M>>
|
||||
: is_valid_vector_argument<T> {
|
||||
};
|
||||
template <typename T, int N>
|
||||
struct is_simd_mask_array_internal<fixed_size_simd_mask<T, N>>
|
||||
: is_valid_vector_argument<T> {
|
||||
};
|
||||
|
||||
// is_integral_internal {{{1
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
struct is_integral_internal<SimdArray<T, N, V, M>, false> : std::is_integral<T> {
|
||||
};
|
||||
|
||||
// is_floating_point_internal {{{1
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
struct is_floating_point_internal<SimdArray<T, N, V, M>, false>
|
||||
: std::is_floating_point<T> {
|
||||
};
|
||||
|
||||
// is_signed_internal {{{1
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
struct is_signed_internal<SimdArray<T, N, V, M>, false> : std::is_signed<T> {
|
||||
};
|
||||
|
||||
// is_unsigned_internal {{{1
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
struct is_unsigned_internal<SimdArray<T, N, V, M>, false> : std::is_unsigned<T> {
|
||||
};
|
||||
|
||||
// has_no_allocated_data_impl {{{1
|
||||
template <typename T, std::size_t N>
|
||||
struct has_no_allocated_data_impl<Vc::SimdArray<T, N>> : std::true_type {
|
||||
};
|
||||
|
||||
// }}}1
|
||||
} // namespace Traits
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_SIMDARRAYFWD_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,593 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SIMDARRAYHELPER_H_
|
||||
#define VC_COMMON_SIMDARRAYHELPER_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
// private_init {{{
|
||||
namespace
|
||||
{
|
||||
static constexpr struct private_init_t {} private_init = {};
|
||||
} // unnamed namespace
|
||||
// }}}
|
||||
|
||||
namespace Common
|
||||
{
|
||||
|
||||
/// \addtogroup SimdArray
|
||||
/// @{
|
||||
|
||||
namespace Operations/*{{{*/
|
||||
{
|
||||
struct tag {};
|
||||
#define Vc_DEFINE_OPERATION(name_) \
|
||||
struct name_ : public tag { \
|
||||
template <typename V, typename... Args> \
|
||||
Vc_INTRINSIC void operator()(V &v, Args &&... args) \
|
||||
{ \
|
||||
v.name_(std::forward<Args>(args)...); \
|
||||
} \
|
||||
}
|
||||
Vc_DEFINE_OPERATION(gather);
|
||||
Vc_DEFINE_OPERATION(scatter);
|
||||
Vc_DEFINE_OPERATION(load);
|
||||
Vc_DEFINE_OPERATION(store);
|
||||
Vc_DEFINE_OPERATION(setZero);
|
||||
Vc_DEFINE_OPERATION(setZeroInverted);
|
||||
Vc_DEFINE_OPERATION(assign);
|
||||
#undef Vc_DEFINE_OPERATION
|
||||
#define Vc_DEFINE_OPERATION(name_, code_) \
|
||||
struct name_ : public tag { \
|
||||
template <typename V> Vc_INTRINSIC void operator()(V &v) { code_; } \
|
||||
}
|
||||
Vc_DEFINE_OPERATION(increment, ++(v));
|
||||
Vc_DEFINE_OPERATION(decrement, --(v));
|
||||
Vc_DEFINE_OPERATION(random, v = V::Random());
|
||||
#undef Vc_DEFINE_OPERATION
|
||||
#define Vc_DEFINE_OPERATION_FORWARD(name_) \
|
||||
struct Forward_##name_ : public tag \
|
||||
{ \
|
||||
template <typename... Args, typename = decltype(name_(std::declval<Args>()...))> \
|
||||
Vc_INTRINSIC void operator()(decltype(name_(std::declval<Args>()...)) &v, \
|
||||
Args &&... args) \
|
||||
{ \
|
||||
v = name_(std::forward<Args>(args)...); \
|
||||
} \
|
||||
template <typename... Args, typename = decltype(name_(std::declval<Args>()...))> \
|
||||
Vc_INTRINSIC void operator()(std::nullptr_t, Args && ... args) \
|
||||
{ \
|
||||
name_(std::forward<Args>(args)...); \
|
||||
} \
|
||||
}
|
||||
Vc_DEFINE_OPERATION_FORWARD(abs);
|
||||
Vc_DEFINE_OPERATION_FORWARD(asin);
|
||||
Vc_DEFINE_OPERATION_FORWARD(atan);
|
||||
Vc_DEFINE_OPERATION_FORWARD(atan2);
|
||||
Vc_DEFINE_OPERATION_FORWARD(cos);
|
||||
Vc_DEFINE_OPERATION_FORWARD(ceil);
|
||||
Vc_DEFINE_OPERATION_FORWARD(copysign);
|
||||
Vc_DEFINE_OPERATION_FORWARD(exp);
|
||||
Vc_DEFINE_OPERATION_FORWARD(exponent);
|
||||
Vc_DEFINE_OPERATION_FORWARD(fma);
|
||||
Vc_DEFINE_OPERATION_FORWARD(floor);
|
||||
Vc_DEFINE_OPERATION_FORWARD(frexp);
|
||||
Vc_DEFINE_OPERATION_FORWARD(isfinite);
|
||||
Vc_DEFINE_OPERATION_FORWARD(isinf);
|
||||
Vc_DEFINE_OPERATION_FORWARD(isnan);
|
||||
Vc_DEFINE_OPERATION_FORWARD(isnegative);
|
||||
Vc_DEFINE_OPERATION_FORWARD(ldexp);
|
||||
Vc_DEFINE_OPERATION_FORWARD(log);
|
||||
Vc_DEFINE_OPERATION_FORWARD(log10);
|
||||
Vc_DEFINE_OPERATION_FORWARD(log2);
|
||||
Vc_DEFINE_OPERATION_FORWARD(reciprocal);
|
||||
Vc_DEFINE_OPERATION_FORWARD(round);
|
||||
Vc_DEFINE_OPERATION_FORWARD(rsqrt);
|
||||
Vc_DEFINE_OPERATION_FORWARD(sin);
|
||||
Vc_DEFINE_OPERATION_FORWARD(sincos);
|
||||
Vc_DEFINE_OPERATION_FORWARD(sqrt);
|
||||
Vc_DEFINE_OPERATION_FORWARD(trunc);
|
||||
Vc_DEFINE_OPERATION_FORWARD(min);
|
||||
Vc_DEFINE_OPERATION_FORWARD(max);
|
||||
#undef Vc_DEFINE_OPERATION_FORWARD
|
||||
template<typename T> using is_operation = std::is_base_of<tag, T>;
|
||||
} // namespace Operations }}}
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* Helper type to statically communicate segmentation of one vector register into 2^n parts
|
||||
* (Pieces).
|
||||
*
|
||||
* Forward declaration in common/types.h.
|
||||
*/
|
||||
template <typename T_, std::size_t Pieces_, std::size_t Index_> struct Segment/*{{{*/
|
||||
{
|
||||
static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report.");
|
||||
|
||||
using type = T_;
|
||||
using type_decayed = typename std::decay<type>::type;
|
||||
static constexpr std::size_t Pieces = Pieces_;
|
||||
static constexpr std::size_t Index = Index_;
|
||||
using fixed_size_type =
|
||||
fixed_size_simd<conditional_t<Traits::is_simd_vector<type_decayed>::value,
|
||||
typename type_decayed::EntryType, float>,
|
||||
type_decayed::Size / Pieces>;
|
||||
|
||||
type data;
|
||||
|
||||
static constexpr std::size_t EntryOffset = Index * type_decayed::Size / Pieces;
|
||||
|
||||
// no non-const operator[] needed
|
||||
decltype(std::declval<const type &>()[0]) operator[](size_t i) const { return data[i + EntryOffset]; }
|
||||
|
||||
fixed_size_type to_fixed_size() const
|
||||
{
|
||||
return simd_cast<fixed_size_type, Index>(data);
|
||||
}
|
||||
};/*}}}*/
|
||||
|
||||
//Segment<T *, ...> specialization {{{
|
||||
template <typename T_, std::size_t Pieces_, std::size_t Index_>
|
||||
struct Segment<T_ *, Pieces_, Index_> {
|
||||
static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report.");
|
||||
|
||||
using type = T_ *;
|
||||
using type_decayed = typename std::decay<T_>::type;
|
||||
static constexpr size_t Pieces = Pieces_;
|
||||
static constexpr size_t Index = Index_;
|
||||
using fixed_size_type = fixed_size_simd<
|
||||
typename std::conditional<Traits::is_simd_vector<type_decayed>::value,
|
||||
typename type_decayed::VectorEntryType, float>::type,
|
||||
type_decayed::Size / Pieces> *;
|
||||
|
||||
type data;
|
||||
|
||||
static constexpr std::size_t EntryOffset = Index * type_decayed::size() / Pieces;
|
||||
|
||||
fixed_size_type to_fixed_size() const
|
||||
{
|
||||
return reinterpret_cast<
|
||||
#ifdef Vc_GCC
|
||||
// GCC might ICE if this type is declared with may_alias. If it doesn't
|
||||
// ICE it warns about ignoring the attribute.
|
||||
typename std::remove_pointer<fixed_size_type>::type
|
||||
#else
|
||||
MayAlias<typename std::remove_pointer<fixed_size_type>::type>
|
||||
#endif
|
||||
*>(data) +
|
||||
Index;
|
||||
}
|
||||
|
||||
//decltype(std::declval<type>()[0]) operator[](size_t i) { return data[i + EntryOffset]; }
|
||||
//decltype(std::declval<type>()[0]) operator[](size_t i) const { return data[i + EntryOffset]; }
|
||||
};/*}}}*/
|
||||
|
||||
/** \internal
|
||||
Template class that is used to attach an offset value to an existing type. It is used
|
||||
for IndexesFromZero construction in SimdArray. The \c data1 constructor needs to know
|
||||
that the IndexesFromZero constructor requires an offset so that the whole data is
|
||||
constructed as a correct sequence from `0` to `Size - 1`.
|
||||
|
||||
\tparam T The original type that needs the offset attached.
|
||||
\tparam Offset An integral value that determines the offset in the complete SimdArray.
|
||||
*/
|
||||
template <typename T, std::size_t Offset> struct AddOffset
|
||||
{
|
||||
constexpr AddOffset() = default;
|
||||
};
|
||||
|
||||
// class Split {{{1
|
||||
/** \internal
|
||||
Helper type with static functions to generically adjust arguments for the \c data0 and
|
||||
\c data1 members of SimdArray and SimdMaskArray.
|
||||
|
||||
\tparam secondOffset The offset in number of elements that \c data1 has in the SimdArray
|
||||
/ SimdMaskArray. This is essentially equal to the number of
|
||||
elements in \c data0.
|
||||
*/
|
||||
template <std::size_t secondOffset> class Split
|
||||
{
|
||||
// split composite SimdArray
|
||||
template <typename U, std::size_t N, typename V, std::size_t M,
|
||||
typename = enable_if<N != M>>
|
||||
static Vc_INTRINSIC auto loImpl(const SimdArray<U, N, V, M> &x)
|
||||
-> decltype(internal_data0(x))
|
||||
{
|
||||
return internal_data0(x);
|
||||
}
|
||||
template <typename U, std::size_t N, typename V, std::size_t M,
|
||||
typename = enable_if<N != M>>
|
||||
static Vc_INTRINSIC auto hiImpl(const SimdArray<U, N, V, M> &x)
|
||||
-> decltype(internal_data1(x))
|
||||
{
|
||||
return internal_data1(x);
|
||||
}
|
||||
template <typename U, std::size_t N, typename V, std::size_t M,
|
||||
typename = enable_if<N != M>>
|
||||
static Vc_INTRINSIC auto loImpl(SimdArray<U, N, V, M> *x)
|
||||
-> decltype(&internal_data0(*x))
|
||||
{
|
||||
return &internal_data0(*x);
|
||||
}
|
||||
template <typename U, std::size_t N, typename V, std::size_t M,
|
||||
typename = enable_if<N != M>>
|
||||
static Vc_INTRINSIC auto hiImpl(SimdArray<U, N, V, M> *x)
|
||||
-> decltype(&internal_data1(*x))
|
||||
{
|
||||
return &internal_data1(*x);
|
||||
}
|
||||
|
||||
// split atomic SimdArray
|
||||
template <typename U, std::size_t N, typename V>
|
||||
static Vc_INTRINSIC Segment<V, 2, 0> loImpl(const SimdArray<U, N, V, N> &x)
|
||||
{
|
||||
return {internal_data(x)};
|
||||
}
|
||||
template <typename U, std::size_t N, typename V>
|
||||
static Vc_INTRINSIC Segment<V, 2, 1> hiImpl(const SimdArray<U, N, V, N> &x)
|
||||
{
|
||||
return {internal_data(x)};
|
||||
}
|
||||
template <typename U, std::size_t N, typename V>
|
||||
static Vc_INTRINSIC Segment<V *, 2, 0> loImpl(SimdArray<U, N, V, N> *x)
|
||||
{
|
||||
return {&internal_data(*x)};
|
||||
}
|
||||
template <typename U, std::size_t N, typename V>
|
||||
static Vc_INTRINSIC Segment<V *, 2, 1> hiImpl(SimdArray<U, N, V, N> *x)
|
||||
{
|
||||
return {&internal_data(*x)};
|
||||
}
|
||||
|
||||
// split composite SimdMaskArray
|
||||
template <typename U, std::size_t N, typename V, std::size_t M>
|
||||
static Vc_INTRINSIC auto loImpl(const SimdMaskArray<U, N, V, M> &x) -> decltype(internal_data0(x))
|
||||
{
|
||||
return internal_data0(x);
|
||||
}
|
||||
template <typename U, std::size_t N, typename V, std::size_t M>
|
||||
static Vc_INTRINSIC auto hiImpl(const SimdMaskArray<U, N, V, M> &x) -> decltype(internal_data1(x))
|
||||
{
|
||||
return internal_data1(x);
|
||||
}
|
||||
|
||||
template <typename U, std::size_t N, typename V>
|
||||
static Vc_INTRINSIC Segment<typename SimdMaskArray<U, N, V, N>::mask_type, 2, 0> loImpl(
|
||||
const SimdMaskArray<U, N, V, N> &x)
|
||||
{
|
||||
return {internal_data(x)};
|
||||
}
|
||||
template <typename U, std::size_t N, typename V>
|
||||
static Vc_INTRINSIC Segment<typename SimdMaskArray<U, N, V, N>::mask_type, 2, 1> hiImpl(
|
||||
const SimdMaskArray<U, N, V, N> &x)
|
||||
{
|
||||
return {internal_data(x)};
|
||||
}
|
||||
|
||||
// split Vector<T> and Mask<T>
|
||||
#ifdef Vc_IMPL_AVX
|
||||
template <class T>
|
||||
static Vc_INTRINSIC SSE::Vector<T> loImpl(Vector<T, VectorAbi::Avx> &&x)
|
||||
{
|
||||
return simd_cast<SSE::Vector<T>, 0>(x);
|
||||
}
|
||||
template <class T>
|
||||
static Vc_INTRINSIC SSE::Vector<T> hiImpl(Vector<T, VectorAbi::Avx> &&x)
|
||||
{
|
||||
return simd_cast<SSE::Vector<T>, 1>(x);
|
||||
}
|
||||
template <class T>
|
||||
static Vc_INTRINSIC SSE::Mask<T> loImpl(Mask<T, VectorAbi::Avx> &&x)
|
||||
{
|
||||
return simd_cast<SSE::Mask<T>, 0>(x);
|
||||
}
|
||||
template <class T>
|
||||
static Vc_INTRINSIC SSE::Mask<T> hiImpl(Mask<T, VectorAbi::Avx> &&x)
|
||||
{
|
||||
return simd_cast<SSE::Mask<T>, 1>(x);
|
||||
}
|
||||
#endif // Vc_IMPL_AVX
|
||||
template <typename T>
|
||||
static constexpr bool is_vector_or_mask(){
|
||||
return (Traits::is_simd_vector<T>::value && !Traits::isSimdArray<T>::value) ||
|
||||
(Traits::is_simd_mask<T>::value && !Traits::isSimdMaskArray<T>::value);
|
||||
}
|
||||
template <typename V>
|
||||
static Vc_INTRINSIC Segment<V, 2, 0> loImpl(V &&x, enable_if<is_vector_or_mask<V>()> = nullarg)
|
||||
{
|
||||
return {std::forward<V>(x)};
|
||||
}
|
||||
template <typename V>
|
||||
static Vc_INTRINSIC Segment<V, 2, 1> hiImpl(V &&x, enable_if<is_vector_or_mask<V>()> = nullarg)
|
||||
{
|
||||
return {std::forward<V>(x)};
|
||||
}
|
||||
|
||||
// split std::vector<T>
|
||||
template <class T, class A>
|
||||
static Vc_INTRINSIC const T *loImpl(const std::vector<T, A> &x)
|
||||
{
|
||||
return x.data();
|
||||
}
|
||||
template <class T, class A>
|
||||
static Vc_INTRINSIC const T *hiImpl(const std::vector<T, A> &x)
|
||||
{
|
||||
return x.data() + secondOffset;
|
||||
}
|
||||
|
||||
// generically split Segments
|
||||
template <typename V, std::size_t Pieces, std::size_t Index>
|
||||
static Vc_INTRINSIC Segment<V, 2 * Pieces, 2 * Index> loImpl(
|
||||
const Segment<V, Pieces, Index> &x)
|
||||
{
|
||||
return {x.data};
|
||||
}
|
||||
template <typename V, std::size_t Pieces, std::size_t Index>
|
||||
static Vc_INTRINSIC Segment<V, 2 * Pieces, 2 * Index + 1> hiImpl(
|
||||
const Segment<V, Pieces, Index> &x)
|
||||
{
|
||||
return {x.data};
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* \name Checks for existence of \c loImpl / \c hiImpl
|
||||
*/
|
||||
//@{
|
||||
template <typename T, typename = decltype(loImpl(std::declval<T>()))>
|
||||
static std::true_type have_lo_impl(int);
|
||||
template <typename T> static std::false_type have_lo_impl(float);
|
||||
template <typename T> static constexpr bool have_lo_impl()
|
||||
{
|
||||
return decltype(have_lo_impl<T>(1))::value;
|
||||
}
|
||||
|
||||
template <typename T, typename = decltype(hiImpl(std::declval<T>()))>
|
||||
static std::true_type have_hi_impl(int);
|
||||
template <typename T> static std::false_type have_hi_impl(float);
|
||||
template <typename T> static constexpr bool have_hi_impl()
|
||||
{
|
||||
return decltype(have_hi_impl<T>(1))::value;
|
||||
}
|
||||
//@}
|
||||
|
||||
public:
|
||||
/** \internal
|
||||
* \name with Operations tag
|
||||
*
|
||||
* These functions don't overload on the data parameter. The first parameter (the tag) clearly
|
||||
* identifies the intended function.
|
||||
*/
|
||||
//@{
|
||||
template <typename U>
|
||||
static Vc_INTRINSIC const U *lo(Operations::gather, const U *ptr)
|
||||
{
|
||||
return ptr;
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_INTRINSIC const U *hi(Operations::gather, const U *ptr)
|
||||
{
|
||||
return ptr + secondOffset;
|
||||
}
|
||||
template <typename U, typename = enable_if<!std::is_pointer<U>::value>>
|
||||
static Vc_ALWAYS_INLINE decltype(loImpl(std::declval<U>()))
|
||||
lo(Operations::gather, U &&x)
|
||||
{
|
||||
return loImpl(std::forward<U>(x));
|
||||
}
|
||||
template <typename U, typename = enable_if<!std::is_pointer<U>::value>>
|
||||
static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval<U>()))
|
||||
hi(Operations::gather, U &&x)
|
||||
{
|
||||
return hiImpl(std::forward<U>(x));
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_INTRINSIC const U *lo(Operations::scatter, const U *ptr)
|
||||
{
|
||||
return ptr;
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_INTRINSIC const U *hi(Operations::scatter, const U *ptr)
|
||||
{
|
||||
return ptr + secondOffset;
|
||||
}
|
||||
//@}
|
||||
|
||||
/** \internal
|
||||
\name without Operations tag
|
||||
|
||||
These functions are not clearly tagged as to where they are used and therefore
|
||||
behave differently depending on the type of the parameter. Different behavior is
|
||||
implemented via overloads of \c loImpl and \c hiImpl. They are not overloads of \c
|
||||
lo and \c hi directly because it's hard to compete against a universal reference
|
||||
(i.e. an overload for `int` requires overloads for `int &`, `const int &`, and `int
|
||||
&&`. If one of them were missing `U &&` would win in overload resolution).
|
||||
*/
|
||||
//@{
|
||||
template <typename U>
|
||||
static Vc_ALWAYS_INLINE decltype(loImpl(std::declval<U>())) lo(U &&x)
|
||||
{
|
||||
return loImpl(std::forward<U>(x));
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval<U>())) hi(U &&x)
|
||||
{
|
||||
return hiImpl(std::forward<U>(x));
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
static Vc_ALWAYS_INLINE enable_if<!have_lo_impl<U>(), U> lo(U &&x)
|
||||
{
|
||||
return std::forward<U>(x);
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_ALWAYS_INLINE enable_if<!have_hi_impl<U>(), U> hi(U &&x)
|
||||
{
|
||||
return std::forward<U>(x);
|
||||
}
|
||||
//@}
|
||||
};
|
||||
|
||||
// actual_value {{{1
|
||||
template <typename Op, typename U, std::size_t M, typename V>
|
||||
static Vc_INTRINSIC const V &actual_value(Op, const SimdArray<U, M, V, M> &x)
|
||||
{
|
||||
return internal_data(x);
|
||||
}
|
||||
template <typename Op, typename U, std::size_t M, typename V>
|
||||
static Vc_INTRINSIC V *actual_value(Op, SimdArray<U, M, V, M> *x)
|
||||
{
|
||||
return &internal_data(*x);
|
||||
}
|
||||
template <typename Op, typename T, size_t Pieces, size_t Index>
|
||||
static Vc_INTRINSIC typename Segment<T, Pieces, Index>::fixed_size_type actual_value(
|
||||
Op, Segment<T, Pieces, Index> &&seg)
|
||||
{
|
||||
return seg.to_fixed_size();
|
||||
}
|
||||
|
||||
template <typename Op, typename U, std::size_t M, typename V>
|
||||
static Vc_INTRINSIC const typename V::Mask &actual_value(Op, const SimdMaskArray<U, M, V, M> &x)
|
||||
{
|
||||
return internal_data(x);
|
||||
}
|
||||
template <typename Op, typename U, std::size_t M, typename V>
|
||||
static Vc_INTRINSIC typename V::Mask *actual_value(Op, SimdMaskArray<U, M, V, M> *x)
|
||||
{
|
||||
return &internal_data(*x);
|
||||
}
|
||||
|
||||
// unpackArgumentsAuto {{{1
|
||||
/**\internal
|
||||
* \name unpackArgumentsAuto
|
||||
*
|
||||
* Search for the right amount of SimdArray "unpacking" (via actual_value) to match the
|
||||
* interface of the function to be called.
|
||||
*
|
||||
* The compiler can figure this out for us thanks to SFINAE. The approach is to have a
|
||||
* number \c I that determines the indexes of the arguments to be transformed via
|
||||
* actual_value. Each bit of \c I identifies an argument. unpackArgumentsAuto starts the
|
||||
* recursion with `I = 0`, i.e. no actual_value transformations. If the overload calling
|
||||
* \c op is unavailable due to a substitution failure \c I is incremented and the function
|
||||
* recurses. Otherwise there are two unpackArgumentsAutoImpl functions in the overload
|
||||
* set. The first argument (\c int / \c float) leads to a preference of the function
|
||||
* calling \c op, thus ending the recursion.
|
||||
*/
|
||||
///@{
|
||||
|
||||
///\internal transforms \p arg via actual_value
|
||||
template <typename Op, typename Arg>
|
||||
Vc_INTRINSIC decltype(actual_value(std::declval<Op &>(), std::declval<Arg>()))
|
||||
conditionalUnpack(std::true_type, Op op, Arg &&arg)
|
||||
{
|
||||
return actual_value(op, std::forward<Arg>(arg));
|
||||
}
|
||||
///\internal forwards \p arg to its return value
|
||||
template <typename Op, typename Arg>
|
||||
Vc_INTRINSIC Arg conditionalUnpack(std::false_type, Op, Arg &&arg)
|
||||
{
|
||||
return std::forward<Arg>(arg);
|
||||
}
|
||||
|
||||
///\internal true-/false_type that selects whether the argument with index B should be unpacked
|
||||
template <size_t A, size_t B>
|
||||
struct selectorType : public std::integral_constant<bool, !((A & (size_t(1) << B)) != 0)> {
|
||||
};
|
||||
|
||||
///\internal ends the recursion, transforms arguments, and calls \p op
|
||||
template <size_t I, typename Op, typename R, typename... Args, size_t... Indexes>
|
||||
Vc_INTRINSIC decltype(std::declval<Op &>()(std::declval<R &>(),
|
||||
conditionalUnpack(selectorType<I, Indexes>(),
|
||||
std::declval<Op &>(),
|
||||
std::declval<Args>())...))
|
||||
unpackArgumentsAutoImpl(int, index_sequence<Indexes...>, Op op, R &&r, Args &&... args)
|
||||
{
|
||||
op(std::forward<R>(r),
|
||||
conditionalUnpack(selectorType<I, Indexes>(), op, std::forward<Args>(args))...);
|
||||
}
|
||||
|
||||
///\internal the current actual_value calls don't work: recurse to I + 1
|
||||
template <size_t I, typename Op, typename R, typename... Args, size_t... Indexes>
|
||||
Vc_INTRINSIC enable_if<(I <= (size_t(1) << sizeof...(Args))), void> unpackArgumentsAutoImpl(
|
||||
float, index_sequence<Indexes...> is, Op op, R &&r, Args &&... args)
|
||||
{
|
||||
// if R is nullptr_t then the return type cannot enforce that actually any unwrapping
|
||||
// of the SimdArray types happens. Thus, you could get an endless loop of the
|
||||
// SimdArray function overload calling itself, if the index goes up to (1 <<
|
||||
// sizeof...(Args)) - 1 (which means no argument transformations via actual_value).
|
||||
static_assert(
|
||||
I < (1 << sizeof...(Args)) - (std::is_same<R, std::nullptr_t>::value ? 1 : 0),
|
||||
"Vc or compiler bug. Please report. Failed to find a combination of "
|
||||
"actual_value(arg) transformations that allows calling Op.");
|
||||
unpackArgumentsAutoImpl<I + 1, Op, R, Args...>(int(), is, op, std::forward<R>(r),
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
#ifdef Vc_ICC
|
||||
template <size_t, typename... Ts> struct IccWorkaround {
|
||||
using type = void;
|
||||
};
|
||||
template <typename... Ts> struct IccWorkaround<2, Ts...> {
|
||||
using type = typename std::remove_pointer<typename std::decay<
|
||||
typename std::tuple_element<1, std::tuple<Ts...>>::type>::type>::type;
|
||||
};
|
||||
#endif
|
||||
|
||||
///\internal The interface to start the machinery.
|
||||
template <typename Op, typename R, typename... Args>
|
||||
Vc_INTRINSIC void unpackArgumentsAuto(Op op, R &&r, Args &&... args)
|
||||
{
|
||||
#ifdef Vc_ICC
|
||||
// ugly hacky workaround for ICC:
|
||||
// The compiler fails to do SFINAE right on recursion. We have to hit the right
|
||||
// recursionStart number from the start.
|
||||
const int recursionStart =
|
||||
Traits::isSimdArray<
|
||||
typename IccWorkaround<sizeof...(Args), Args...>::type>::value &&
|
||||
(std::is_same<Op, Common::Operations::Forward_frexp>::value ||
|
||||
std::is_same<Op, Common::Operations::Forward_ldexp>::value)
|
||||
? 2
|
||||
: 0;
|
||||
#else
|
||||
const int recursionStart = 0;
|
||||
#endif
|
||||
unpackArgumentsAutoImpl<recursionStart>(
|
||||
int(), make_index_sequence<sizeof...(Args)>(), op, std::forward<R>(r),
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
///@}
|
||||
|
||||
//}}}1
|
||||
///@}
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_SIMDARRAYHELPER_H_
|
||||
|
||||
// vim: foldmethod=marker
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,719 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SIMDMASKARRAY_H_
|
||||
#define VC_COMMON_SIMDMASKARRAY_H_
|
||||
|
||||
#include <type_traits>
|
||||
#include <array>
|
||||
#include "simdarrayhelper.h"
|
||||
#include "utility.h"
|
||||
#include "maskbool.h"
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/// \addtogroup SimdArray
|
||||
/// @{
|
||||
// atomic SimdMaskArray {{{1
|
||||
/**\internal
|
||||
* Specialization of `SimdMaskArray<T, N, VectorType, VectorSize>` for the case where `N
|
||||
* == VectorSize`.
|
||||
*
|
||||
* This is specialized for implementation purposes: Since the general implementation uses
|
||||
* two SimdMaskArray data members it recurses over different SimdMaskArray instantiations.
|
||||
* The recursion is ended by this specialization, which has a single \p storage_type data
|
||||
* member to which all functions are forwarded more or less directly.
|
||||
*/
|
||||
template <typename T, std::size_t N, typename VectorType_>
|
||||
class SimdMaskArray<T, N, VectorType_, N>
|
||||
{
|
||||
public:
|
||||
using VectorType = VectorType_;
|
||||
using vector_type = VectorType;
|
||||
using mask_type = typename vector_type::Mask;
|
||||
using storage_type = mask_type;
|
||||
|
||||
friend storage_type &internal_data(SimdMaskArray &m) { return m.data; }
|
||||
friend const storage_type &internal_data(const SimdMaskArray &m) { return m.data; }
|
||||
|
||||
static constexpr std::size_t size() { return N; }
|
||||
static constexpr std::size_t Size = size();
|
||||
static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
|
||||
static_assert(Size == vector_type::Size, "size mismatch");
|
||||
|
||||
using vectorentry_type = typename mask_type::VectorEntryType;
|
||||
using value_type = typename mask_type::EntryType;
|
||||
using Mask = mask_type;
|
||||
using VectorEntryType = vectorentry_type;
|
||||
using EntryType = value_type;
|
||||
using EntryReference = Vc::Detail::ElementReference<storage_type, SimdMaskArray>;
|
||||
using reference = EntryReference;
|
||||
using Vector = fixed_size_simd<T, N>;
|
||||
|
||||
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(mask_type));
|
||||
|
||||
// zero init
|
||||
SimdMaskArray() = default;
|
||||
|
||||
// default copy ctor/operator
|
||||
SimdMaskArray(const SimdMaskArray &) = default;
|
||||
SimdMaskArray(SimdMaskArray &&) = default;
|
||||
SimdMaskArray &operator=(const SimdMaskArray &) = default;
|
||||
SimdMaskArray &operator=(SimdMaskArray &&) = default;
|
||||
|
||||
// broadcasts
|
||||
Vc_INTRINSIC explicit SimdMaskArray(VectorSpecialInitializerOne one) : data(one) {}
|
||||
Vc_INTRINSIC explicit SimdMaskArray(VectorSpecialInitializerZero zero) : data(zero) {}
|
||||
Vc_INTRINSIC explicit SimdMaskArray(bool b) : data(b) {}
|
||||
Vc_INTRINSIC static SimdMaskArray Zero() { return {private_init, storage_type::Zero()}; }
|
||||
Vc_INTRINSIC static SimdMaskArray One() { return {private_init, storage_type::One()}; }
|
||||
|
||||
// conversion (casts); implemented in simd_cast_caller.tcc
|
||||
template <class U, class V, class = enable_if<N == V::Size>>
|
||||
Vc_INTRINSIC_L SimdMaskArray(const SimdMaskArray<U, N, V> &x) Vc_INTRINSIC_R;
|
||||
template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
|
||||
class = U>
|
||||
Vc_INTRINSIC_L SimdMaskArray(const SimdMaskArray<U, N, V> &x) Vc_INTRINSIC_R;
|
||||
template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
|
||||
class = U, class = U>
|
||||
Vc_INTRINSIC_L SimdMaskArray(const SimdMaskArray<U, N, V> &x) Vc_INTRINSIC_R;
|
||||
|
||||
// conversion from any Segment object (could be SimdMaskArray or Mask<T>)
|
||||
template <typename M, std::size_t Pieces, std::size_t Index>
|
||||
Vc_INTRINSIC_L SimdMaskArray(
|
||||
Common::Segment<M, Pieces, Index> &&x,
|
||||
enable_if<Traits::simd_vector_size<M>::value == Size * Pieces> = nullarg) Vc_INTRINSIC_R;
|
||||
|
||||
// conversion from Mask<T>
|
||||
template <class M, class = enable_if<(Traits::is_simd_mask<M>::value &&
|
||||
!Traits::isSimdMaskArray<M>::value &&
|
||||
Traits::simd_vector_size<M>::value == Size)>>
|
||||
Vc_INTRINSIC_L SimdMaskArray(M k) Vc_INTRINSIC_R;
|
||||
|
||||
// implicit conversion to Mask<U, AnyAbi> for if Mask<U, AnyAbi>::size() == N
|
||||
template <class U, class A,
|
||||
class = enable_if<Vc::Mask<U, A>::Size == N &&
|
||||
!detail::is_fixed_size_abi<A>::value>>
|
||||
operator Vc::Mask<U, A>() const
|
||||
{
|
||||
return simd_cast<Vc::Mask<U, A>>(data);
|
||||
}
|
||||
operator fixed_size_simd_mask<T, N> &()
|
||||
{
|
||||
return static_cast<fixed_size_simd_mask<T, N> &>(*this);
|
||||
}
|
||||
operator const fixed_size_simd_mask<T, N> &() const
|
||||
{
|
||||
return static_cast<const fixed_size_simd_mask<T, N> &>(*this);
|
||||
}
|
||||
|
||||
// load/store (from/to bool arrays)
|
||||
template <typename Flags = DefaultLoadTag>
|
||||
Vc_INTRINSIC explicit SimdMaskArray(const bool *mem, Flags f = Flags())
|
||||
: data(mem, f)
|
||||
{
|
||||
}
|
||||
|
||||
Vc_INTRINSIC void load(const bool *mem) { data.load(mem); }
|
||||
template <typename Flags> Vc_INTRINSIC void load(const bool *mem, Flags f)
|
||||
{
|
||||
data.load(mem, f);
|
||||
}
|
||||
|
||||
Vc_INTRINSIC void store(bool *mem) const { data.store(mem); }
|
||||
template <typename Flags> Vc_INTRINSIC void store(bool *mem, Flags f) const
|
||||
{
|
||||
data.store(mem, f);
|
||||
}
|
||||
|
||||
// compares
|
||||
Vc_INTRINSIC Vc_PURE bool operator==(const SimdMaskArray &rhs) const
|
||||
{
|
||||
return data == rhs.data;
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE bool operator!=(const SimdMaskArray &rhs) const
|
||||
{
|
||||
return data != rhs.data;
|
||||
}
|
||||
|
||||
// inversion
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator!() const
|
||||
{
|
||||
return {private_init, !data};
|
||||
}
|
||||
|
||||
// binary operators
|
||||
Vc_INTRINSIC SimdMaskArray &operator&=(const SimdMaskArray &rhs)
|
||||
{
|
||||
data &= rhs.data;
|
||||
return *this;
|
||||
}
|
||||
Vc_INTRINSIC SimdMaskArray &operator|=(const SimdMaskArray &rhs)
|
||||
{
|
||||
data |= rhs.data;
|
||||
return *this;
|
||||
}
|
||||
Vc_INTRINSIC SimdMaskArray &operator^=(const SimdMaskArray &rhs)
|
||||
{
|
||||
data ^= rhs.data;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator&(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {private_init, data & rhs.data};
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator|(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {private_init, data | rhs.data};
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator^(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {private_init, data ^ rhs.data};
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator&&(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {private_init, data && rhs.data};
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator||(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {private_init, data || rhs.data};
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Vc_PURE bool isFull() const { return data.isFull(); }
|
||||
Vc_INTRINSIC Vc_PURE bool isNotEmpty() const { return data.isNotEmpty(); }
|
||||
Vc_INTRINSIC Vc_PURE bool isEmpty() const { return data.isEmpty(); }
|
||||
Vc_INTRINSIC Vc_PURE bool isMix() const { return data.isMix(); }
|
||||
|
||||
Vc_INTRINSIC Vc_PURE int shiftMask() const { return data.shiftMask(); }
|
||||
|
||||
Vc_INTRINSIC Vc_PURE int toInt() const { return data.toInt(); }
|
||||
|
||||
private:
|
||||
friend reference;
|
||||
static Vc_INTRINSIC value_type get(const storage_type &k, int i) noexcept
|
||||
{
|
||||
return k[i];
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_INTRINSIC void set(storage_type &k, int i, U &&v) noexcept(
|
||||
noexcept(std::declval<storage_type &>()[0] = std::declval<U>()))
|
||||
{
|
||||
k[i] = std::forward<U>(v);
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* \note the returned object models the concept of a reference and
|
||||
* as such it can exist longer than the data it is referencing.
|
||||
* \note to avoid lifetime issues, we strongly advice not to store
|
||||
* any reference objects.
|
||||
*/
|
||||
Vc_INTRINSIC Vc_PURE reference operator[](size_t index) noexcept
|
||||
{
|
||||
return {data, int(index)};
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE value_type operator[](size_t index) const noexcept
|
||||
{
|
||||
return data[index];
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Vc_PURE int count() const { return data.count(); }
|
||||
|
||||
/**
|
||||
* Returns the index of the first one in the mask.
|
||||
*
|
||||
* The return value is undefined if the mask is empty.
|
||||
*/
|
||||
Vc_INTRINSIC Vc_PURE int firstOne() const { return data.firstOne(); }
|
||||
|
||||
template <typename G>
|
||||
static Vc_INTRINSIC fixed_size_simd_mask<T, N> generate(const G &gen)
|
||||
{
|
||||
return {private_init, mask_type::generate(gen)};
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> shifted(int amount) const
|
||||
{
|
||||
return {private_init, data.shifted(amount)};
|
||||
}
|
||||
|
||||
/// \internal execute specified Operation
|
||||
template <typename Op, typename... Args>
|
||||
static Vc_INTRINSIC fixed_size_simd_mask<T, N> fromOperation(Op op, Args &&... args)
|
||||
{
|
||||
fixed_size_simd_mask<T, N> r;
|
||||
Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
|
||||
return r;
|
||||
}
|
||||
|
||||
/// \internal
|
||||
Vc_INTRINSIC SimdMaskArray(private_init_t, mask_type &&x) : data(std::move(x)) {}
|
||||
|
||||
private:
|
||||
// The alignas attribute attached to the class declaration above is ignored by ICC
|
||||
// 17.0.0 (at least). So just move the alignas attribute down here where it works for
|
||||
// all compilers.
|
||||
alignas(static_cast<std::size_t>(
|
||||
Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
|
||||
VectorType_::size()>::value)) storage_type data;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdMaskArray<T, N, VectorType, N>::Size;
|
||||
template <typename T, std::size_t N, typename VectorType>
|
||||
constexpr std::size_t SimdMaskArray<T, N, VectorType, N>::MemoryAlignment;
|
||||
|
||||
// generic SimdMaskArray {{{1
|
||||
/**
|
||||
* Data-parallel mask type with user-defined number of boolean elements.
|
||||
*
|
||||
* \tparam T The value type of the corresponding SimdArray. Depending on the target
|
||||
* platform this type determines a different bit representation to work most
|
||||
* efficient with SimdArray types instantiated for \p T.
|
||||
*
|
||||
* \tparam N The number of boolean elements to store and process concurrently. You can
|
||||
* choose an arbitrary number, though not every number is a good idea.
|
||||
* Generally, a power of two value or the sum of two power of two values might
|
||||
* work efficiently, though this depends a lot on the target system.
|
||||
*
|
||||
* \tparam V Don't change the default value unless you really know what you are doing.
|
||||
* This type is set to the underlying native Vc::Vector type used in the
|
||||
* implementation of the type.
|
||||
* Having it as part of the type name guards against some cases of ODR
|
||||
* violations (i.e. linking incompatible translation units / libraries).
|
||||
*
|
||||
* \tparam Wt Don't ever change the default value.
|
||||
* This parameter is an unfortunate implementation detail shining through.
|
||||
*
|
||||
* \headerfile simdmaskarray.h <Vc/SimdArray>
|
||||
*/
|
||||
template <typename T, size_t N, typename V, size_t Wt>
|
||||
class SimdMaskArray
|
||||
{
|
||||
static constexpr std::size_t N0 = Common::left_size<N>();
|
||||
|
||||
using Split = Common::Split<N0>;
|
||||
|
||||
public:
|
||||
using storage_type0 = fixed_size_simd_mask<T, N0>;
|
||||
using storage_type1 = fixed_size_simd_mask<T, N - N0>;
|
||||
static_assert(storage_type0::size() == N0, "");
|
||||
|
||||
using vector_type = fixed_size_simd<T, N>;
|
||||
|
||||
friend storage_type0 &internal_data0(SimdMaskArray &m) { return m.data0; }
|
||||
friend storage_type1 &internal_data1(SimdMaskArray &m) { return m.data1; }
|
||||
friend const storage_type0 &internal_data0(const SimdMaskArray &m) { return m.data0; }
|
||||
friend const storage_type1 &internal_data1(const SimdMaskArray &m) { return m.data1; }
|
||||
|
||||
using mask_type = SimdMaskArray;
|
||||
|
||||
///\copydoc Mask::size()
|
||||
static constexpr std::size_t size() { return N; }
|
||||
///\copydoc Mask::Size
|
||||
static constexpr std::size_t Size = size();
|
||||
///\copydoc Mask::MemoryAlignment
|
||||
static constexpr std::size_t MemoryAlignment =
|
||||
storage_type0::MemoryAlignment > storage_type1::MemoryAlignment
|
||||
? storage_type0::MemoryAlignment
|
||||
: storage_type1::MemoryAlignment;
|
||||
static_assert(Size == vector_type::Size, "size mismatch");
|
||||
|
||||
///\internal
|
||||
using vectorentry_type = typename storage_type0::VectorEntryType;
|
||||
|
||||
///\copydoc Mask::value_type
|
||||
using value_type = typename storage_type0::EntryType;
|
||||
///\copydoc Mask::Mask
|
||||
using MaskType = mask_type;
|
||||
///\copydoc Mask::VectorEntryType
|
||||
using VectorEntryType = vectorentry_type;
|
||||
///\copydoc Mask::EntryType
|
||||
using EntryType = value_type;
|
||||
///\copydoc Mask::EntryReference
|
||||
using EntryReference = Vc::Detail::ElementReference<SimdMaskArray>;
|
||||
using reference = EntryReference;
|
||||
/// An alias for the corresponding SimdArray type.
|
||||
using Vector = fixed_size_simd<T, N>;
|
||||
|
||||
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(mask_type));
|
||||
|
||||
// zero init
|
||||
///\copydoc Mask::Mask()
|
||||
SimdMaskArray() = default;
|
||||
|
||||
// default copy ctor/operator
|
||||
SimdMaskArray(const SimdMaskArray &) = default;
|
||||
SimdMaskArray(SimdMaskArray &&) = default;
|
||||
SimdMaskArray &operator=(const SimdMaskArray &) = default;
|
||||
SimdMaskArray &operator=(SimdMaskArray &&) = default;
|
||||
|
||||
// implicit conversion from SimdMaskArray with same N
|
||||
template <typename U, typename W>
|
||||
Vc_INTRINSIC SimdMaskArray(const SimdMaskArray<U, N, W> &rhs)
|
||||
: data0(Split::lo(rhs)), data1(Split::hi(rhs))
|
||||
{
|
||||
}
|
||||
|
||||
// conversion from any Segment object (could be SimdMaskArray or Mask<T>)
|
||||
template <typename M, std::size_t Pieces, std::size_t Index>
|
||||
Vc_INTRINSIC SimdMaskArray(
|
||||
Common::Segment<M, Pieces, Index> &&rhs,
|
||||
enable_if<Traits::simd_vector_size<M>::value == Size * Pieces> = nullarg)
|
||||
: data0(Split::lo(rhs)), data1(Split::hi(rhs))
|
||||
{
|
||||
}
|
||||
|
||||
// conversion from Mask<T>
|
||||
template <class M, class = enable_if<(Traits::is_simd_mask<M>::value &&
|
||||
!Traits::isSimdMaskArray<M>::value &&
|
||||
Traits::simd_vector_size<M>::value == Size)>>
|
||||
Vc_INTRINSIC SimdMaskArray(M k) : data0(Split::lo(k)), data1(Split::hi(k))
|
||||
{
|
||||
}
|
||||
|
||||
// implicit conversion to Mask<U, AnyAbi> for if Mask<U, AnyAbi>::size() == N
|
||||
template <class U, class A,
|
||||
class = enable_if<Vc::Mask<U, A>::Size == N &&
|
||||
!detail::is_fixed_size_abi<A>::value>>
|
||||
operator Vc::Mask<U, A>() const
|
||||
{
|
||||
return simd_cast<Vc::Mask<U, A>>(data0, data1);
|
||||
}
|
||||
Vc_INTRINSIC operator fixed_size_simd_mask<T, N> &()
|
||||
{
|
||||
return static_cast<fixed_size_simd_mask<T, N> &>(*this);
|
||||
}
|
||||
Vc_INTRINSIC operator const fixed_size_simd_mask<T, N> &() const
|
||||
{
|
||||
return static_cast<const fixed_size_simd_mask<T, N> &>(*this);
|
||||
}
|
||||
|
||||
///\copybrief Mask::Mask(VectorSpecialInitializerOne)
|
||||
Vc_INTRINSIC explicit SimdMaskArray(VectorSpecialInitializerOne one)
|
||||
: data0(one), data1(one)
|
||||
{
|
||||
}
|
||||
///\copybrief Mask::Mask(VectorSpecialInitializerZero)
|
||||
Vc_INTRINSIC explicit SimdMaskArray(VectorSpecialInitializerZero zero)
|
||||
: data0(zero), data1(zero)
|
||||
{
|
||||
}
|
||||
///\copydoc Mask::Mask(bool)
|
||||
Vc_INTRINSIC explicit SimdMaskArray(bool b) : data0(b), data1(b) {}
|
||||
|
||||
///\copydoc Mask::Zero()
|
||||
Vc_INTRINSIC static fixed_size_simd_mask<T, N> Zero()
|
||||
{
|
||||
return {storage_type0::Zero(), storage_type1::Zero()};
|
||||
}
|
||||
///\copydoc Mask::One()
|
||||
Vc_INTRINSIC static fixed_size_simd_mask<T, N> One()
|
||||
{
|
||||
return {storage_type0::One(), storage_type1::One()};
|
||||
}
|
||||
|
||||
///\name Loads & Stores
|
||||
///@{
|
||||
|
||||
/**
|
||||
* Load N boolean values from the consecutive addresses starting at \p mem.
|
||||
*
|
||||
* \param mem A pointer to an array of booleans.
|
||||
* \param f A combination of flags to modify specific behavior of the load.
|
||||
*/
|
||||
template <typename Flags = DefaultLoadTag>
|
||||
Vc_INTRINSIC explicit SimdMaskArray(const bool *mem, Flags f = Flags())
|
||||
: data0(mem, f), data1(mem + storage_type0::size(), f)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Load N boolean values from the consecutive addresses starting at \p mem.
|
||||
*
|
||||
* \param mem A pointer to an array of booleans.
|
||||
*/
|
||||
Vc_INTRINSIC void load(const bool *mem)
|
||||
{
|
||||
data0.load(mem);
|
||||
data1.load(mem + storage_type0::size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Load N boolean values from the consecutive addresses starting at \p mem.
|
||||
*
|
||||
* \param mem A pointer to an array of booleans.
|
||||
* \param f A combination of flags to modify specific behavior of the load.
|
||||
*/
|
||||
template <typename Flags> Vc_INTRINSIC void load(const bool *mem, Flags f)
|
||||
{
|
||||
data0.load(mem, f);
|
||||
data1.load(mem + storage_type0::size(), f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store N boolean values to the consecutive addresses starting at \p mem.
|
||||
*
|
||||
* \param mem A pointer to an array of booleans.
|
||||
*/
|
||||
Vc_INTRINSIC void store(bool *mem) const
|
||||
{
|
||||
data0.store(mem);
|
||||
data1.store(mem + storage_type0::size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Store N boolean values to the consecutive addresses starting at \p mem.
|
||||
*
|
||||
* \param mem A pointer to an array of booleans.
|
||||
* \param f A combination of flags to modify specific behavior of the load.
|
||||
*/
|
||||
template <typename Flags> Vc_INTRINSIC void store(bool *mem, Flags f) const
|
||||
{
|
||||
data0.store(mem, f);
|
||||
data1.store(mem + storage_type0::size(), f);
|
||||
}
|
||||
///@}
|
||||
|
||||
///\copydoc Mask::operator==
|
||||
Vc_INTRINSIC Vc_PURE bool operator==(const SimdMaskArray &mask) const
|
||||
{
|
||||
return data0 == mask.data0 && data1 == mask.data1;
|
||||
}
|
||||
///\copydoc Mask::operator!=
|
||||
Vc_INTRINSIC Vc_PURE bool operator!=(const SimdMaskArray &mask) const
|
||||
{
|
||||
return data0 != mask.data0 || data1 != mask.data1;
|
||||
}
|
||||
|
||||
///\copybrief Mask::operator!
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator!() const
|
||||
{
|
||||
return {!data0, !data1};
|
||||
}
|
||||
|
||||
///\copybrief Mask::operator&=
|
||||
Vc_INTRINSIC SimdMaskArray &operator&=(const SimdMaskArray &rhs)
|
||||
{
|
||||
data0 &= rhs.data0;
|
||||
data1 &= rhs.data1;
|
||||
return *this;
|
||||
}
|
||||
///\copybrief Mask::operator|=
|
||||
Vc_INTRINSIC SimdMaskArray &operator|=(const SimdMaskArray &rhs)
|
||||
{
|
||||
data0 |= rhs.data0;
|
||||
data1 |= rhs.data1;
|
||||
return *this;
|
||||
}
|
||||
///\copybrief Mask::operator^=
|
||||
Vc_INTRINSIC SimdMaskArray &operator^=(const SimdMaskArray &rhs)
|
||||
{
|
||||
data0 ^= rhs.data0;
|
||||
data1 ^= rhs.data1;
|
||||
return *this;
|
||||
}
|
||||
|
||||
///\copybrief Mask::operator&
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator&(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {data0 & rhs.data0, data1 & rhs.data1};
|
||||
}
|
||||
///\copybrief Mask::operator|
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator|(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {data0 | rhs.data0, data1 | rhs.data1};
|
||||
}
|
||||
///\copybrief Mask::operator^
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator^(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {data0 ^ rhs.data0, data1 ^ rhs.data1};
|
||||
}
|
||||
|
||||
///\copybrief Mask::operator&&
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator&&(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {data0 && rhs.data0, data1 && rhs.data1};
|
||||
}
|
||||
///\copybrief Mask::operator||
|
||||
Vc_INTRINSIC Vc_PURE fixed_size_simd_mask<T, N> operator||(
|
||||
const SimdMaskArray &rhs) const
|
||||
{
|
||||
return {data0 || rhs.data0, data1 || rhs.data1};
|
||||
}
|
||||
|
||||
///\copybrief Mask::isFull
|
||||
Vc_INTRINSIC Vc_PURE bool isFull() const { return data0.isFull() && data1.isFull(); }
|
||||
///\copybrief Mask::isNotEmpty
|
||||
Vc_INTRINSIC Vc_PURE bool isNotEmpty() const { return data0.isNotEmpty() || data1.isNotEmpty(); }
|
||||
///\copybrief Mask::isEmpty
|
||||
Vc_INTRINSIC Vc_PURE bool isEmpty() const { return data0.isEmpty() && data1.isEmpty(); }
|
||||
///\copybrief Mask::isMix
|
||||
Vc_INTRINSIC Vc_PURE bool isMix() const { return !isFull() && !isEmpty(); }
|
||||
|
||||
///\copydoc Mask::toInt
|
||||
Vc_INTRINSIC Vc_PURE int toInt() const
|
||||
{
|
||||
return data0.toInt() | (data1.toInt() << data0.size());
|
||||
}
|
||||
|
||||
private:
|
||||
friend reference;
|
||||
static Vc_INTRINSIC value_type get(const SimdMaskArray &o, int i) noexcept
|
||||
{
|
||||
if (i < int(o.data0.size())) {
|
||||
return o.data0[i];
|
||||
} else {
|
||||
return o.data1[i - o.data0.size()];
|
||||
}
|
||||
}
|
||||
template <typename U>
|
||||
static Vc_INTRINSIC void set(SimdMaskArray &o, int i, U &&v) noexcept(
|
||||
noexcept(std::declval<storage_type0 &>()[0] = std::declval<U>()) &&
|
||||
noexcept(std::declval<storage_type1 &>()[0] = std::declval<U>()))
|
||||
{
|
||||
if (i < int(o.data0.size())) {
|
||||
o.data0[i] = std::forward<U>(v);
|
||||
} else {
|
||||
o.data1[i - o.data0.size()] = std::forward<U>(v);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
/**
|
||||
* Return a smart reference to the boolean element at index \p index.
|
||||
*
|
||||
* \param index The element index to be accessed.
|
||||
*
|
||||
* \returns A temporary smart reference object which acts as much as an lvalue
|
||||
* reference as possible.
|
||||
*/
|
||||
Vc_INTRINSIC Vc_PURE reference operator[](size_t index) noexcept
|
||||
{
|
||||
return {*this, int(index)};
|
||||
}
|
||||
/**
|
||||
* Return a copy of the boolean element at index \p index.
|
||||
*
|
||||
* \param index The element index to be accessed.
|
||||
*
|
||||
* \returns A temporary boolean object with the value of the element at index \p
|
||||
* index.
|
||||
*/
|
||||
Vc_INTRINSIC Vc_PURE value_type operator[](size_t index) const noexcept
|
||||
{
|
||||
return get(*this, index);
|
||||
}
|
||||
|
||||
///\copybrief Mask::count
|
||||
Vc_INTRINSIC Vc_PURE int count() const { return data0.count() + data1.count(); }
|
||||
|
||||
///\copydoc Mask::firstOne
|
||||
Vc_INTRINSIC Vc_PURE int firstOne() const {
|
||||
if (data0.isEmpty()) {
|
||||
return data1.firstOne() + storage_type0::size();
|
||||
}
|
||||
return data0.firstOne();
|
||||
}
|
||||
|
||||
///\copybrief Mask::generate
|
||||
template <typename G>
|
||||
static Vc_INTRINSIC fixed_size_simd_mask<T, N> generate(const G &gen)
|
||||
{
|
||||
return {storage_type0::generate(gen),
|
||||
storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
|
||||
}
|
||||
|
||||
///\copybrief Mask::shifted
|
||||
inline Vc_PURE fixed_size_simd_mask<T, N> shifted(int amount) const
|
||||
{
|
||||
if (Vc_IS_UNLIKELY(amount == 0)) {
|
||||
return *this;
|
||||
}
|
||||
return generate([&](unsigned i) {
|
||||
// modulo arithmetic of unsigned makes the check for j >= 0 unnecessary
|
||||
const unsigned j = i + amount;
|
||||
return j < size() ? get(*this, j) : false;
|
||||
});
|
||||
}
|
||||
|
||||
/// \internal execute specified Operation
|
||||
template <typename Op, typename... Args>
|
||||
static Vc_INTRINSIC fixed_size_simd_mask<T, N> fromOperation(Op op, Args &&... args)
|
||||
{
|
||||
fixed_size_simd_mask<T, N> r = {
|
||||
storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
|
||||
// could move and thus
|
||||
// break the next line
|
||||
storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
|
||||
return r;
|
||||
}
|
||||
|
||||
/// \internal
|
||||
Vc_INTRINSIC SimdMaskArray(storage_type0 &&x, storage_type1 &&y)
|
||||
: data0(std::move(x)), data1(std::move(y))
|
||||
{
|
||||
}
|
||||
|
||||
private:
|
||||
// The alignas attribute attached to the class declaration above is ignored by ICC
|
||||
// 17.0.0 (at least). So just move the alignas attribute down here where it works for
|
||||
// all compilers.
|
||||
alignas(static_cast<std::size_t>(
|
||||
Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
|
||||
V::size()>::value)) storage_type0 data0;
|
||||
storage_type1 data1;
|
||||
};
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
constexpr std::size_t SimdMaskArray<T, N, V, M>::Size;
|
||||
template <typename T, std::size_t N, typename V, std::size_t M>
|
||||
constexpr std::size_t SimdMaskArray<T, N, V, M>::MemoryAlignment;
|
||||
|
||||
///}}}1
|
||||
/// @}
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
// XXX: this include should be in <Vc/vector.h>. But at least clang 3.4 then fails to compile the
|
||||
// code. Not sure yet what is going on, but it looks a lot like a bug in clang.
|
||||
#include "simd_cast_caller.tcc"
|
||||
|
||||
#endif // VC_COMMON_SIMDMASKARRAY_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,653 @@
|
|||
// -*- C++ -*-
|
||||
//===------------------------------ span ---------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||
// Source Licenses. See LICENSE.TXT for details.
|
||||
//
|
||||
// Adapted for use with Vc:
|
||||
// Copyright © 2018 Matthias Kretz <kretz@kde.org>
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
#ifndef VC_COMMON_SPAN_H_
|
||||
#define VC_COMMON_SPAN_H_
|
||||
|
||||
#include <array> // for array
|
||||
#include <cstddef> // for ptrdiff_t
|
||||
#include <cstddef> // for std::byte
|
||||
#include <iterator> // for iterators
|
||||
#include <type_traits> // for remove_cv, etc
|
||||
#include "subscript.h" // for AdaptSubscriptOperator
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
#ifdef __cpp_inline_variables
|
||||
inline
|
||||
#endif
|
||||
constexpr ptrdiff_t dynamic_extent = -1;
|
||||
namespace Common
|
||||
{
|
||||
template <typename T, ptrdiff_t Extent = dynamic_extent> class span;
|
||||
|
||||
template <typename T, ptrdiff_t Extent>
|
||||
constexpr auto begin(const span<T, Extent>& s) noexcept -> decltype(s.begin())
|
||||
{
|
||||
return s.begin();
|
||||
}
|
||||
template <typename T, ptrdiff_t Extent>
|
||||
constexpr auto end(const span<T, Extent>& s) noexcept -> decltype(s.end())
|
||||
{
|
||||
return s.end();
|
||||
}
|
||||
|
||||
template <class T> struct _is_span_impl : public std::false_type {
|
||||
};
|
||||
|
||||
template <class T, ptrdiff_t Extent>
|
||||
struct _is_span_impl<span<T, Extent>> : public std::true_type {
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct _is_span : public _is_span_impl<typename std::remove_cv<T>::type> {
|
||||
};
|
||||
|
||||
template <class T> struct _is_std_array_impl : public std::false_type {
|
||||
};
|
||||
|
||||
template <class T, size_t Sz>
|
||||
struct _is_std_array_impl<array<T, Sz>> : public std::true_type {
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct _is_std_array : public _is_std_array_impl<typename std::remove_cv<T>::type> {
|
||||
};
|
||||
|
||||
template <class T, class ElementType, class = void>
|
||||
struct _is_span_compatible_container : public std::false_type {
|
||||
};
|
||||
|
||||
template <class... Ts> using _void_t = void;
|
||||
|
||||
template <class C> constexpr auto _std_data(C& c) -> decltype(c.data())
|
||||
{
|
||||
return c.data();
|
||||
}
|
||||
template <class C> constexpr auto _std_data(const C& c) -> decltype(c.data())
|
||||
{
|
||||
return c.data();
|
||||
}
|
||||
template <class T, std::size_t N> constexpr T* _std_data(T (&array)[N]) noexcept
|
||||
{
|
||||
return array;
|
||||
}
|
||||
template <class E> constexpr const E* _std_data(std::initializer_list<E> il) noexcept
|
||||
{
|
||||
return il.begin();
|
||||
}
|
||||
|
||||
template <class C> constexpr auto _std_size(const C& c) -> decltype(c.size())
|
||||
{
|
||||
return c.size();
|
||||
}
|
||||
template <class T, std::size_t N>
|
||||
constexpr std::size_t _std_size(const T (&array)[N]) noexcept
|
||||
{
|
||||
return N;
|
||||
}
|
||||
|
||||
template <class T, class ElementType>
|
||||
struct _is_span_compatible_container<
|
||||
T, ElementType,
|
||||
_void_t<
|
||||
// is not a specialization of span
|
||||
typename std::enable_if<!_is_span<T>::value, std::nullptr_t>::type,
|
||||
// is not a specialization of array
|
||||
typename std::enable_if<!_is_std_array<T>::value, std::nullptr_t>::type,
|
||||
// is_array_v<Container> is false,
|
||||
typename std::enable_if<!std::is_array<T>::value, std::nullptr_t>::type,
|
||||
// data(cont) and size(cont) are well formed
|
||||
decltype(data(std::declval<T>())), decltype(size(std::declval<T>())),
|
||||
// remove_pointer_t<decltype(data(cont))>(*)[] is convertible to ElementType(*)[]
|
||||
typename std::enable_if<
|
||||
std::is_convertible<typename std::remove_pointer<decltype(
|
||||
data(std::declval<T&>()))>::type (*)[],
|
||||
ElementType (*)[]>::value,
|
||||
std::nullptr_t>::type>> : public std::true_type {
|
||||
};
|
||||
|
||||
#if defined Vc_MSVC || (defined Vc_GCC && Vc_GCC < 0x50100) || defined Vc_ICC || !defined __cpp_constexpr || __cpp_constexpr < 201304
|
||||
#define Vc_CONSTEXPR
|
||||
#else
|
||||
#define Vc_CONSTEXPR constexpr
|
||||
#endif
|
||||
|
||||
template <typename T, ptrdiff_t Extent> class span
|
||||
{
|
||||
public:
|
||||
// constants and types
|
||||
using element_type = T;
|
||||
using value_type = typename std::remove_cv<T>::type;
|
||||
using index_type = ptrdiff_t;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = T*;
|
||||
using const_pointer = const T*; // not in standard
|
||||
using reference = T&;
|
||||
using const_reference = const T&; // not in standard
|
||||
using iterator = pointer;
|
||||
using const_iterator = const_pointer;
|
||||
using reverse_iterator = std::reverse_iterator<iterator>;
|
||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
|
||||
static constexpr index_type extent = Extent;
|
||||
static_assert(Extent >= 0, "Can't have a span with an extent < 0");
|
||||
|
||||
// [span.cons], span constructors, copy, assignment, and destructor
|
||||
Vc_CONSTEXPR span() noexcept : data_{nullptr}
|
||||
{
|
||||
static_assert(Extent == 0,
|
||||
"Can't default construct a statically sized span with size > 0");
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR span(const span&) noexcept = default;
|
||||
Vc_CONSTEXPR span& operator=(const span&) noexcept = default;
|
||||
|
||||
Vc_CONSTEXPR span(pointer _ptr, index_type _count) : data_{_ptr}
|
||||
{
|
||||
(void)_count;
|
||||
Vc_ASSERT(((void)"size mismatch in span's constructor (ptr, len)", Extent == _count));
|
||||
}
|
||||
Vc_CONSTEXPR span(pointer _f, pointer _l) : data_{_f}
|
||||
{
|
||||
(void)_l;
|
||||
Vc_ASSERT(((void)"size mismatch in span's constructor (ptr, ptr)",
|
||||
Extent == distance(_f, _l)));
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR span(element_type (&_arr)[Extent]) noexcept : data_{_arr} {}
|
||||
Vc_CONSTEXPR span(array<value_type, Extent>& _arr) noexcept : data_{_arr.data()} {}
|
||||
Vc_CONSTEXPR span(const array<value_type, Extent>& _arr) noexcept : data_{_arr.data()} {}
|
||||
|
||||
template <class Container>
|
||||
inline Vc_CONSTEXPR span(
|
||||
Container& _c,
|
||||
typename std::enable_if<_is_span_compatible_container<Container, T>::value,
|
||||
std::nullptr_t>::type = nullptr)
|
||||
: data_{_std_data(_c)}
|
||||
{
|
||||
Vc_ASSERT(("size mismatch in span's constructor (container))",
|
||||
Extent == _std_size(_c)));
|
||||
}
|
||||
|
||||
template <class Container>
|
||||
inline Vc_CONSTEXPR span(
|
||||
const Container& _c,
|
||||
typename std::enable_if<_is_span_compatible_container<const Container, T>::value,
|
||||
std::nullptr_t>::type = nullptr)
|
||||
: data_{_std_data(_c)}
|
||||
{
|
||||
Vc_ASSERT(("size mismatch in span's constructor (const container)",
|
||||
Extent == _std_size(_c)));
|
||||
}
|
||||
|
||||
template <class OtherElementType>
|
||||
inline Vc_CONSTEXPR span(
|
||||
const span<OtherElementType, Extent>& _other,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<OtherElementType (*)[], element_type (*)[]>::value,
|
||||
std::nullptr_t>::type = nullptr)
|
||||
: data_{_other.data()}
|
||||
{
|
||||
}
|
||||
|
||||
template <class OtherElementType>
|
||||
inline Vc_CONSTEXPR span(
|
||||
const span<OtherElementType, dynamic_extent>& _other,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<OtherElementType (*)[], element_type (*)[]>::value,
|
||||
std::nullptr_t>::type = nullptr) noexcept
|
||||
: data_{_other.data()}
|
||||
{
|
||||
Vc_ASSERT(("size mismatch in span's constructor (other span)",
|
||||
Extent == _other.size()));
|
||||
}
|
||||
|
||||
// ~span() noexcept = default;
|
||||
|
||||
template <ptrdiff_t Count>
|
||||
inline Vc_CONSTEXPR span<element_type, Count> first() const noexcept
|
||||
{
|
||||
static_assert(Count >= 0, "Count must be >= 0 in span::first()");
|
||||
static_assert(Count <= Extent, "Count out of range in span::first()");
|
||||
return {data(), Count};
|
||||
}
|
||||
|
||||
template <ptrdiff_t Count>
|
||||
inline Vc_CONSTEXPR span<element_type, Count> last() const noexcept
|
||||
{
|
||||
static_assert(Count >= 0, "Count must be >= 0 in span::last()");
|
||||
static_assert(Count <= Extent, "Count out of range in span::last()");
|
||||
return {data() + size() - Count, Count};
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR span<element_type, dynamic_extent> first(index_type _count) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("Count out of range in span::first(count)",
|
||||
_count >= 0 && _count <= size()));
|
||||
return {data(), _count};
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR span<element_type, dynamic_extent> last(index_type _count) const noexcept
|
||||
{
|
||||
Vc_ASSERT(
|
||||
("Count out of range in span::last(count)", _count >= 0 && _count <= size()));
|
||||
return {data() + size() - _count, _count};
|
||||
}
|
||||
|
||||
#ifndef Vc_MSVC
|
||||
// MSVC 190024215 fails with "error C2059: syntax error: '<end Parse>'" somewhere in
|
||||
// this file. Unless someone needs this function on MSVC, I don't see a reason to
|
||||
// invest time into working around their bugs.
|
||||
template <ptrdiff_t Offset, ptrdiff_t Count = dynamic_extent>
|
||||
inline Vc_CONSTEXPR auto subspan() const noexcept
|
||||
-> span<element_type, Count != dynamic_extent ? Count : Extent - Offset>
|
||||
{
|
||||
Vc_ASSERT(
|
||||
("Offset out of range in span::subspan()", Offset >= 0 && Offset <= size()));
|
||||
return {data() + Offset, Count == dynamic_extent ? size() - Offset : Count};
|
||||
}
|
||||
|
||||
inline Vc_CONSTEXPR span<element_type, dynamic_extent> subspan(
|
||||
index_type offset, index_type count = dynamic_extent) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("Offset out of range in span::subspan(offset, count)",
|
||||
offset >= 0 && offset <= size()));
|
||||
Vc_ASSERT(("Count out of range in span::subspan(offset, count)",
|
||||
(count >= 0 && count <= size()) || count == dynamic_extent));
|
||||
if (count == dynamic_extent) {
|
||||
return {data() + offset, size() - offset};
|
||||
}
|
||||
Vc_ASSERT(("count + offset out of range in span::subspan(offset, count)",
|
||||
offset + count <= size()));
|
||||
return {data() + offset, count};
|
||||
}
|
||||
#endif // Vc_MSVC
|
||||
|
||||
Vc_CONSTEXPR index_type size() const noexcept { return Extent; }
|
||||
Vc_CONSTEXPR index_type size_bytes() const noexcept
|
||||
{
|
||||
return Extent * sizeof(element_type);
|
||||
}
|
||||
Vc_CONSTEXPR bool empty() const noexcept { return Extent == 0; }
|
||||
|
||||
Vc_CONSTEXPR reference operator[](index_type _idx) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("span<T,N>[] index out of bounds", _idx >= 0 && _idx < size()));
|
||||
return data_[_idx];
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR reference operator()(index_type _idx) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("span<T,N>() index out of bounds", _idx >= 0 && _idx < size()));
|
||||
return data_[_idx];
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR pointer data() const noexcept { return data_; }
|
||||
|
||||
// [span.iter], span iterator support
|
||||
Vc_CONSTEXPR iterator begin() const noexcept { return iterator(data()); }
|
||||
Vc_CONSTEXPR iterator end() const noexcept { return iterator(data() + size()); }
|
||||
Vc_CONSTEXPR const_iterator cbegin() const noexcept { return const_iterator(data()); }
|
||||
Vc_CONSTEXPR const_iterator cend() const noexcept
|
||||
{
|
||||
return const_iterator(data() + size());
|
||||
}
|
||||
Vc_CONSTEXPR reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); }
|
||||
Vc_CONSTEXPR reverse_iterator rend() const noexcept { return reverse_iterator(begin()); }
|
||||
Vc_CONSTEXPR const_reverse_iterator crbegin() const noexcept
|
||||
{
|
||||
return const_reverse_iterator(cend());
|
||||
}
|
||||
Vc_CONSTEXPR const_reverse_iterator crend() const noexcept
|
||||
{
|
||||
return const_reverse_iterator(cbegin());
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR void swap(span& _other) noexcept
|
||||
{
|
||||
pointer _p = data_;
|
||||
data_ = _other.data_;
|
||||
_other.data_ = _p;
|
||||
}
|
||||
|
||||
#ifdef __cpp_lib_byte
|
||||
span<const std::byte, Extent * sizeof(element_type)> _as_bytes() const noexcept
|
||||
{
|
||||
return {reinterpret_cast<const std::byte*>(data()), size_bytes()};
|
||||
}
|
||||
|
||||
span<std::byte, Extent * sizeof(element_type)> _as_writeable_bytes() const noexcept
|
||||
{
|
||||
return {reinterpret_cast<std::byte*>(data()), size_bytes()};
|
||||
}
|
||||
#endif // __cpp_lib_byte
|
||||
|
||||
private:
|
||||
pointer data_;
|
||||
};
|
||||
|
||||
template <typename T> class span<T, dynamic_extent>
|
||||
{
|
||||
private:
|
||||
public:
|
||||
// constants and types
|
||||
using element_type = T;
|
||||
using value_type = typename std::remove_cv<T>::type;
|
||||
using index_type = ptrdiff_t;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = T*;
|
||||
using const_pointer = const T*; // not in standard
|
||||
using reference = T&;
|
||||
using const_reference = const T&; // not in standard
|
||||
using iterator = pointer;
|
||||
using const_iterator = const_pointer;
|
||||
using reverse_iterator = std::reverse_iterator<iterator>;
|
||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
|
||||
static constexpr index_type extent = dynamic_extent;
|
||||
|
||||
// [span.cons], span constructors, copy, assignment, and destructor
|
||||
Vc_CONSTEXPR span() noexcept : data_{nullptr}, size_{0} {}
|
||||
|
||||
Vc_CONSTEXPR span(const span&) noexcept = default;
|
||||
Vc_CONSTEXPR span& operator=(const span&) noexcept = default;
|
||||
|
||||
Vc_CONSTEXPR span(pointer _ptr, index_type _count) : data_{_ptr}, size_{_count} {}
|
||||
Vc_CONSTEXPR span(pointer _f, pointer _l) : data_{_f}, size_{distance(_f, _l)} {}
|
||||
|
||||
template <size_t Sz>
|
||||
inline Vc_CONSTEXPR span(element_type (&_arr)[Sz]) noexcept : data_{_arr}, size_{Sz}
|
||||
{
|
||||
}
|
||||
|
||||
template <size_t Sz>
|
||||
inline Vc_CONSTEXPR span(array<value_type, Sz>& _arr) noexcept
|
||||
: data_{_arr.data()}, size_{Sz}
|
||||
{
|
||||
}
|
||||
|
||||
template <size_t Sz>
|
||||
inline Vc_CONSTEXPR span(const array<value_type, Sz>& _arr) noexcept
|
||||
: data_{_arr.data()}, size_{Sz}
|
||||
{
|
||||
}
|
||||
|
||||
template <class Container>
|
||||
inline Vc_CONSTEXPR span(
|
||||
Container& _c,
|
||||
typename std::enable_if<_is_span_compatible_container<Container, T>::value,
|
||||
std::nullptr_t>::type = nullptr)
|
||||
: data_{_std_data(_c)}, size_{index_type(_std_size(_c))}
|
||||
{
|
||||
}
|
||||
|
||||
template <class Container>
|
||||
inline Vc_CONSTEXPR span(
|
||||
const Container& _c,
|
||||
typename std::enable_if<_is_span_compatible_container<const Container, T>::value,
|
||||
std::nullptr_t>::type = nullptr)
|
||||
: data_{_std_data(_c)}, size_{index_type(_std_size(_c))}
|
||||
{
|
||||
}
|
||||
|
||||
template <class OtherElementType, ptrdiff_t OtherExtent>
|
||||
inline Vc_CONSTEXPR span(
|
||||
const span<OtherElementType, OtherExtent>& _other,
|
||||
typename std::enable_if<
|
||||
std::is_convertible<OtherElementType (*)[], element_type (*)[]>::value,
|
||||
std::nullptr_t>::type = nullptr) noexcept
|
||||
: data_{_other.data()}, size_{_other.size()}
|
||||
{
|
||||
}
|
||||
|
||||
// ~span() noexcept = default;
|
||||
|
||||
template <ptrdiff_t Count>
|
||||
inline Vc_CONSTEXPR span<element_type, Count> first() const noexcept
|
||||
{
|
||||
static_assert(Count >= 0, "");
|
||||
Vc_ASSERT(("Count out of range in span::first()", Count <= size()));
|
||||
return {data(), Count};
|
||||
}
|
||||
|
||||
template <ptrdiff_t Count>
|
||||
inline Vc_CONSTEXPR span<element_type, Count> last() const noexcept
|
||||
{
|
||||
static_assert(Count >= 0, "");
|
||||
Vc_ASSERT(("Count out of range in span::last()", Count <= size()));
|
||||
return {data() + size() - Count, Count};
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR span<element_type, dynamic_extent> first(index_type _count) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("Count out of range in span::first(count)",
|
||||
_count >= 0 && _count <= size()));
|
||||
return {data(), _count};
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR span<element_type, dynamic_extent> last(index_type _count) const noexcept
|
||||
{
|
||||
Vc_ASSERT(
|
||||
("Count out of range in span::last(count)", _count >= 0 && _count <= size()));
|
||||
return {data() + size() - _count, _count};
|
||||
}
|
||||
|
||||
template <ptrdiff_t Offset, ptrdiff_t Count = dynamic_extent>
|
||||
inline Vc_CONSTEXPR span<T, dynamic_extent> subspan() const noexcept
|
||||
{
|
||||
Vc_ASSERT(
|
||||
("Offset out of range in span::subspan()", Offset >= 0 && Offset <= size()));
|
||||
Vc_ASSERT(("Count out of range in span::subspan()",
|
||||
Count == dynamic_extent || Offset + Count <= size()));
|
||||
return {data() + Offset, Count == dynamic_extent ? size() - Offset : Count};
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR span<element_type, dynamic_extent> inline subspan(
|
||||
index_type _offset, index_type _count = dynamic_extent) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("Offset out of range in span::subspan(offset, count)",
|
||||
_offset >= 0 && _offset <= size()));
|
||||
Vc_ASSERT(("count out of range in span::subspan(offset, count)",
|
||||
(_count >= 0 && _count <= size()) || _count == dynamic_extent));
|
||||
if (_count == dynamic_extent)
|
||||
return {data() + _offset, size() - _offset};
|
||||
Vc_ASSERT(("Offset + count out of range in span::subspan(offset, count)",
|
||||
_offset + _count <= size()));
|
||||
return {data() + _offset, _count};
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR index_type size() const noexcept { return size_; }
|
||||
Vc_CONSTEXPR index_type size_bytes() const noexcept
|
||||
{
|
||||
return size_ * sizeof(element_type);
|
||||
}
|
||||
Vc_CONSTEXPR bool empty() const noexcept { return size_ == 0; }
|
||||
|
||||
Vc_CONSTEXPR reference operator[](index_type _idx) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("span<T>[] index out of bounds", _idx >= 0 && _idx < size()));
|
||||
return data_[_idx];
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR reference operator()(index_type _idx) const noexcept
|
||||
{
|
||||
Vc_ASSERT(("span<T>() index out of bounds", _idx >= 0 && _idx < size()));
|
||||
return data_[_idx];
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR pointer data() const noexcept { return data_; }
|
||||
|
||||
// [span.iter], span iterator support
|
||||
Vc_CONSTEXPR iterator begin() const noexcept { return iterator(data()); }
|
||||
Vc_CONSTEXPR iterator end() const noexcept { return iterator(data() + size()); }
|
||||
Vc_CONSTEXPR const_iterator cbegin() const noexcept { return const_iterator(data()); }
|
||||
Vc_CONSTEXPR const_iterator cend() const noexcept
|
||||
{
|
||||
return const_iterator(data() + size());
|
||||
}
|
||||
Vc_CONSTEXPR reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); }
|
||||
Vc_CONSTEXPR reverse_iterator rend() const noexcept { return reverse_iterator(begin()); }
|
||||
Vc_CONSTEXPR const_reverse_iterator crbegin() const noexcept
|
||||
{
|
||||
return const_reverse_iterator(cend());
|
||||
}
|
||||
Vc_CONSTEXPR const_reverse_iterator crend() const noexcept
|
||||
{
|
||||
return const_reverse_iterator(cbegin());
|
||||
}
|
||||
|
||||
Vc_CONSTEXPR void swap(span& _other) noexcept
|
||||
{
|
||||
pointer _p = data_;
|
||||
data_ = _other.data_;
|
||||
_other.data_ = _p;
|
||||
|
||||
index_type _sz = size_;
|
||||
size_ = _other.size_;
|
||||
_other.size_ = _sz;
|
||||
}
|
||||
|
||||
#ifdef __cpp_lib_byte
|
||||
// Disable _as_bytes() for older MSVC versions as it leads to a compilation error due to a compiler bug.
|
||||
// When parsing the return type, MSVC will instantiate the primary template of span<> and static_assert().
|
||||
#if _MSC_VER > 1928
|
||||
span<const std::byte, dynamic_extent> _as_bytes() const noexcept
|
||||
{
|
||||
return {reinterpret_cast<const std::byte*>(data()), size_bytes()};
|
||||
}
|
||||
|
||||
span<std::byte, dynamic_extent> _as_writeable_bytes() const noexcept
|
||||
{
|
||||
return {reinterpret_cast<std::byte*>(data()), size_bytes()};
|
||||
}
|
||||
#endif
|
||||
#endif // __cpp_lib_byte
|
||||
|
||||
private:
|
||||
pointer data_;
|
||||
index_type size_;
|
||||
};
|
||||
|
||||
template <class T1, ptrdiff_t Extent1, class T2, ptrdiff_t Extent2>
|
||||
Vc_CONSTEXPR bool operator==(const span<T1, Extent1>& lhs, const span<T2, Extent2>& rhs)
|
||||
{
|
||||
return equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
|
||||
}
|
||||
|
||||
template <class T1, ptrdiff_t Extent1, class T2, ptrdiff_t Extent2>
|
||||
Vc_CONSTEXPR bool operator!=(const span<T1, Extent1>& lhs, const span<T2, Extent2>& rhs)
|
||||
{
|
||||
return !(rhs == lhs);
|
||||
}
|
||||
|
||||
template <class T1, ptrdiff_t Extent1, class T2, ptrdiff_t Extent2>
|
||||
Vc_CONSTEXPR bool operator<(const span<T1, Extent1>& lhs, const span<T2, Extent2>& rhs)
|
||||
{
|
||||
return lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
|
||||
}
|
||||
|
||||
template <class T1, ptrdiff_t Extent1, class T2, ptrdiff_t Extent2>
|
||||
Vc_CONSTEXPR bool operator<=(const span<T1, Extent1>& lhs, const span<T2, Extent2>& rhs)
|
||||
{
|
||||
return !(rhs < lhs);
|
||||
}
|
||||
|
||||
template <class T1, ptrdiff_t Extent1, class T2, ptrdiff_t Extent2>
|
||||
Vc_CONSTEXPR bool operator>(const span<T1, Extent1>& lhs, const span<T2, Extent2>& rhs)
|
||||
{
|
||||
return rhs < lhs;
|
||||
}
|
||||
|
||||
template <class T1, ptrdiff_t Extent1, class T2, ptrdiff_t Extent2>
|
||||
Vc_CONSTEXPR bool operator>=(const span<T1, Extent1>& lhs, const span<T2, Extent2>& rhs)
|
||||
{
|
||||
return !(lhs < rhs);
|
||||
}
|
||||
|
||||
// as_bytes & as_writeable_bytes
|
||||
template <class T, ptrdiff_t Extent>
|
||||
auto as_bytes(span<T, Extent> _s) noexcept -> decltype(_s._as_bytes())
|
||||
{
|
||||
return _s._as_bytes();
|
||||
}
|
||||
|
||||
template <class T, ptrdiff_t Extent>
|
||||
auto as_writeable_bytes(span<T, Extent> _s) noexcept ->
|
||||
typename std::enable_if<!std::is_const<T>::value,
|
||||
decltype(_s._as_writeable_bytes())>::type
|
||||
{
|
||||
return _s._as_writeable_bytes();
|
||||
}
|
||||
|
||||
template <class T, ptrdiff_t Extent>
|
||||
Vc_CONSTEXPR void swap(span<T, Extent>& lhs, span<T, Extent>& rhs) noexcept
|
||||
{
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
|
||||
#undef Vc_CONSTEXPR
|
||||
|
||||
// Deduction guides
|
||||
#ifdef __cpp_deduction_guides
|
||||
template <class T, size_t Sz> span(T (&)[Sz])->span<T, Sz>;
|
||||
|
||||
template <class T, size_t Sz> span(array<T, Sz>&)->span<T, Sz>;
|
||||
|
||||
template <class T, size_t Sz> span(const array<T, Sz>&)->span<const T, Sz>;
|
||||
|
||||
template <class Container> span(Container&)->span<typename Container::value_type>;
|
||||
|
||||
template <class Container>
|
||||
span(const Container&)->span<const typename Container::value_type>;
|
||||
#endif // __cpp_deduction_guides
|
||||
|
||||
} // namespace Common
|
||||
|
||||
/**
|
||||
* \ingroup Containers
|
||||
* \headerfile span.h <Vc/span>
|
||||
*
|
||||
* An adapted `std::span` with additional subscript operators supporting gather and scatter operations.
|
||||
*
|
||||
* The [std::span](https://en.cppreference.com/w/cpp/container/span) documentation applies.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* struct Point {
|
||||
* float x, y;
|
||||
* };
|
||||
* Point data[100];
|
||||
* // initialize values in data
|
||||
*
|
||||
* Vc::span<Point, 100> view(data);
|
||||
* float_v::IndexType indexes = ...; // values between 0-99
|
||||
* float_v x = view[indexes][&Point::x];
|
||||
* float_v y = view[indexes][&Point::y];
|
||||
* \endcode
|
||||
*/
|
||||
template <typename T, ptrdiff_t Extent = dynamic_extent>
|
||||
using span = Common::AdaptSubscriptOperator<Common::span<T, Extent>>;
|
||||
|
||||
namespace Traits
|
||||
{
|
||||
template <typename T, ptrdiff_t Extent>
|
||||
struct has_contiguous_storage_impl<Vc::span<T, Extent>> : public std::true_type {
|
||||
};
|
||||
template <typename T, ptrdiff_t Extent>
|
||||
struct has_contiguous_storage_impl<Vc::Common::span<T, Extent>> : public std::true_type {
|
||||
};
|
||||
} // namespace Traits
|
||||
|
||||
} // namespace Vc_VERSIONED_NAMESPACE
|
||||
|
||||
#endif // VC_COMMON_SPAN_H_
|
|
@ -0,0 +1,381 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2010-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_STORAGE_H_
|
||||
#define VC_COMMON_STORAGE_H_
|
||||
|
||||
#include "aliasingentryhelper.h"
|
||||
#include "types.h"
|
||||
#include "maskbool.h"
|
||||
#ifdef Vc_IMPL_AVX
|
||||
#include "../avx/intrinsics.h"
|
||||
#endif
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
template <typename V> inline V zero();
|
||||
} // namespace Detail
|
||||
namespace Common
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
#ifdef Vc_IMPL_AVX
|
||||
template <typename ValueType, size_t Size> struct IntrinsicType {
|
||||
using type = typename std::conditional<
|
||||
std::is_integral<ValueType>::value,
|
||||
typename std::conditional<sizeof(ValueType) * Size == 16, __m128i, __m256i>::type,
|
||||
typename std::conditional<
|
||||
std::is_same<ValueType, double>::value,
|
||||
typename std::conditional<sizeof(ValueType) * Size == 16, __m128d,
|
||||
__m256d>::type,
|
||||
typename std::conditional<sizeof(ValueType) * Size == 16, __m128,
|
||||
__m256>::type>::type>::type;
|
||||
};
|
||||
#elif defined Vc_IMPL_SSE
|
||||
template <typename ValueType, size_t Size> struct IntrinsicType {
|
||||
using type = typename std::conditional<
|
||||
std::is_integral<ValueType>::value, __m128i,
|
||||
typename std::conditional<std::is_same<ValueType, double>::value, __m128d,
|
||||
__m128>::type>::type;
|
||||
};
|
||||
#else
|
||||
template <typename ValueType, size_t Size> struct IntrinsicType {
|
||||
static_assert(Size == 1,
|
||||
"IntrinsicType without SIMD target support may only have Size = 1");
|
||||
using type = ValueType;
|
||||
};
|
||||
#endif
|
||||
template <typename ValueType, size_t Size, size_t Bytes = sizeof(ValueType) * Size>
|
||||
struct BuiltinType;
|
||||
#ifdef Vc_USE_BUILTIN_VECTOR_TYPES
|
||||
#define Vc_VECBUILTIN __attribute__((__vector_size__(16)))
|
||||
template <size_t Size> struct BuiltinType< double , Size, 16> { typedef double type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< float , Size, 16> { typedef float type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< long long, Size, 16> { typedef long long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned long long, Size, 16> { typedef unsigned long long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< long , Size, 16> { typedef long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned long , Size, 16> { typedef unsigned long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< int , Size, 16> { typedef int type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned int , Size, 16> { typedef unsigned int type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< short , Size, 16> { typedef short type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned short , Size, 16> { typedef unsigned short type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< char , Size, 16> { typedef char type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned char , Size, 16> { typedef unsigned char type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< signed char , Size, 16> { typedef signed char type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< bool , Size, 16> { typedef unsigned char type Vc_VECBUILTIN; };
|
||||
#undef Vc_VECBUILTIN
|
||||
#define Vc_VECBUILTIN __attribute__((__vector_size__(32)))
|
||||
template <size_t Size> struct BuiltinType< double , Size, 32> { typedef double type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< float , Size, 32> { typedef float type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< long long, Size, 32> { typedef long long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned long long, Size, 32> { typedef unsigned long long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< long , Size, 32> { typedef long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned long , Size, 32> { typedef unsigned long type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< int , Size, 32> { typedef int type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned int , Size, 32> { typedef unsigned int type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< short , Size, 32> { typedef short type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned short , Size, 32> { typedef unsigned short type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< char , Size, 32> { typedef char type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType<unsigned char , Size, 32> { typedef unsigned char type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< signed char , Size, 32> { typedef signed char type Vc_VECBUILTIN; };
|
||||
template <size_t Size> struct BuiltinType< bool , Size, 32> { typedef unsigned char type Vc_VECBUILTIN; };
|
||||
#undef Vc_VECBUILTIN
|
||||
#endif
|
||||
} // namespace Detail
|
||||
|
||||
template <typename ValueType, size_t Size>
|
||||
using IntrinsicType = typename Detail::IntrinsicType<ValueType, Size>::type;
|
||||
|
||||
template <typename ValueType, size_t Size>
|
||||
using BuiltinType = typename Detail::BuiltinType<ValueType, Size>::type;
|
||||
|
||||
namespace AliasStrategy
|
||||
{
|
||||
struct Union {};
|
||||
struct MayAlias {};
|
||||
struct VectorBuiltin {};
|
||||
struct UnionMembers {};
|
||||
} // namespace AliasStrategy
|
||||
|
||||
using DefaultStrategy =
|
||||
#if defined Vc_USE_BUILTIN_VECTOR_TYPES
|
||||
AliasStrategy::VectorBuiltin;
|
||||
#elif defined Vc_MSVC
|
||||
AliasStrategy::UnionMembers;
|
||||
#elif defined Vc_ICC
|
||||
AliasStrategy::Union;
|
||||
#elif defined __GNUC__
|
||||
AliasStrategy::MayAlias;
|
||||
#else
|
||||
AliasStrategy::Union;
|
||||
#endif
|
||||
|
||||
template <typename ValueType, size_t Size, typename Strategy = DefaultStrategy>
|
||||
class Storage;
|
||||
|
||||
// GCC 6 forbids `EntryType m[]` altogether
|
||||
template <typename ValueType, size_t Size>
|
||||
class Storage<ValueType, Size, AliasStrategy::Union>
|
||||
{
|
||||
static_assert(std::is_fundamental<ValueType>::value &&
|
||||
std::is_arithmetic<ValueType>::value,
|
||||
"Only works for fundamental arithmetic types.");
|
||||
|
||||
public:
|
||||
using VectorType = IntrinsicType<ValueType, Size>;
|
||||
using EntryType = ValueType;
|
||||
|
||||
union Alias {
|
||||
Vc_INTRINSIC Alias(VectorType vv) : v(vv) {}
|
||||
VectorType v;
|
||||
EntryType m[Size];
|
||||
};
|
||||
|
||||
Vc_INTRINSIC Storage() : data(Vc::Detail::zero<VectorType>()) {}
|
||||
Vc_INTRINSIC Storage(const VectorType &x) : data(x) { assertCorrectAlignment(&data); }
|
||||
template <typename U>
|
||||
Vc_INTRINSIC explicit Storage(const U &x,
|
||||
enable_if<sizeof(U) == sizeof(VectorType)> = nullarg)
|
||||
: data(reinterpret_cast<VectorType>(x))
|
||||
{
|
||||
assertCorrectAlignment(&data);
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Storage(const Storage &) = default;
|
||||
Vc_INTRINSIC Storage &operator=(const Storage &) = default;
|
||||
|
||||
Vc_INTRINSIC operator const VectorType &() const { return data; }
|
||||
Vc_INTRINSIC Vc_PURE VectorType &v() { return data; }
|
||||
Vc_INTRINSIC Vc_PURE const VectorType &v() const { return data; }
|
||||
Vc_INTRINSIC Vc_PURE EntryType m(size_t i) const { return Alias(data).m[i]; }
|
||||
Vc_INTRINSIC void set(size_t i, EntryType x)
|
||||
{
|
||||
Alias a(data);
|
||||
a.m[i] = x;
|
||||
data = a.v;
|
||||
}
|
||||
|
||||
private:
|
||||
VectorType data;
|
||||
};
|
||||
|
||||
template <typename ValueType, size_t Size>
|
||||
class Storage<ValueType, Size, AliasStrategy::MayAlias>
|
||||
{
|
||||
static_assert(std::is_fundamental<ValueType>::value &&
|
||||
std::is_arithmetic<ValueType>::value,
|
||||
"Only works for fundamental arithmetic types.");
|
||||
|
||||
public:
|
||||
using VectorType = IntrinsicType<ValueType, Size>;
|
||||
using EntryType = ValueType;
|
||||
|
||||
Vc_INTRINSIC Storage() : data() { assertCorrectAlignment(&data); }
|
||||
Vc_INTRINSIC Storage(const VectorType &x) : data(x)
|
||||
{
|
||||
assertCorrectAlignment(&data);
|
||||
}
|
||||
template <typename U>
|
||||
Vc_INTRINSIC explicit Storage(const U &x,
|
||||
enable_if<sizeof(U) == sizeof(VectorType)> = nullarg)
|
||||
: data(reinterpret_cast<const VectorType &>(x))
|
||||
{
|
||||
assertCorrectAlignment(&data);
|
||||
}
|
||||
Vc_INTRINSIC Storage &operator=(const VectorType &x)
|
||||
{
|
||||
data = x;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Storage(const Storage &) = default;
|
||||
Vc_INTRINSIC Storage &operator=(const Storage &) = default;
|
||||
|
||||
Vc_INTRINSIC operator const VectorType &() const { return v(); }
|
||||
Vc_INTRINSIC Vc_PURE VectorType &v() { return data; }
|
||||
Vc_INTRINSIC Vc_PURE const VectorType &v() const { return data; }
|
||||
|
||||
Vc_INTRINSIC Vc_PURE EntryType m(size_t i) const
|
||||
{
|
||||
return aliasing_cast<EntryType>(&data)[i];
|
||||
}
|
||||
Vc_INTRINSIC void set(size_t i, EntryType x)
|
||||
{
|
||||
aliasing_cast<EntryType>(&data)[i] = x;
|
||||
}
|
||||
|
||||
private:
|
||||
VectorType data;
|
||||
};
|
||||
|
||||
template <typename ValueType, size_t Size>
|
||||
class Storage<ValueType, Size, AliasStrategy::VectorBuiltin>
|
||||
{
|
||||
static_assert(std::is_fundamental<ValueType>::value &&
|
||||
std::is_arithmetic<ValueType>::value,
|
||||
"Only works for fundamental arithmetic types.");
|
||||
|
||||
using Builtin = BuiltinType<ValueType, Size>;
|
||||
|
||||
public:
|
||||
using VectorType =
|
||||
#ifdef Vc_TEMPLATES_DROP_ATTRIBUTES
|
||||
MayAlias<IntrinsicType<ValueType, Size>>;
|
||||
#else
|
||||
IntrinsicType<ValueType, Size>;
|
||||
#endif
|
||||
using EntryType = ValueType;
|
||||
|
||||
Vc_INTRINSIC Storage() : data() { assertCorrectAlignment(&data); }
|
||||
Vc_INTRINSIC Storage(const Storage &) = default;
|
||||
Vc_INTRINSIC Storage &operator=(const Storage &) = default;
|
||||
|
||||
Vc_INTRINSIC Storage(const VectorType &x)
|
||||
: data(aliasing_cast<Builtin>(x))
|
||||
{
|
||||
assertCorrectAlignment(&data);
|
||||
}
|
||||
template <typename U>
|
||||
Vc_INTRINSIC explicit Storage(const U &x,
|
||||
enable_if<sizeof(U) == sizeof(VectorType)> = nullarg)
|
||||
: data(aliasing_cast<Builtin>(x))
|
||||
{
|
||||
assertCorrectAlignment(&data);
|
||||
}
|
||||
Vc_INTRINSIC Storage &operator=(const VectorType &x)
|
||||
{
|
||||
data = aliasing_cast<Builtin>(x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_INTRINSIC operator const VectorType &() const { return v(); }
|
||||
Vc_INTRINSIC Vc_PURE VectorType &v() { return reinterpret_cast<VectorType &>(data); }
|
||||
Vc_INTRINSIC Vc_PURE const VectorType &v() const { return reinterpret_cast<const VectorType &>(data); }
|
||||
|
||||
Vc_INTRINSIC Vc_PURE EntryType m(size_t i) const { return data[i]; }
|
||||
Vc_INTRINSIC void set(size_t i, EntryType x) { data[i] = x; }
|
||||
|
||||
Vc_INTRINSIC Builtin &builtin() { return data; }
|
||||
Vc_INTRINSIC const Builtin &builtin() const { return data; }
|
||||
|
||||
private:
|
||||
Builtin data;
|
||||
};
|
||||
|
||||
template <typename ValueType, size_t Size>
|
||||
class Storage<ValueType, Size, AliasStrategy::UnionMembers>
|
||||
{
|
||||
static_assert(std::is_fundamental<ValueType>::value &&
|
||||
std::is_arithmetic<ValueType>::value,
|
||||
"Only works for fundamental arithmetic types.");
|
||||
|
||||
public:
|
||||
using VectorType = IntrinsicType<ValueType, Size>;
|
||||
using EntryType = ValueType;
|
||||
|
||||
Vc_INTRINSIC Storage() : data() { assertCorrectAlignment(&data); }
|
||||
Vc_INTRINSIC Storage(const VectorType &x) : data(x)
|
||||
{
|
||||
assertCorrectAlignment(&data);
|
||||
}
|
||||
template <typename U>
|
||||
Vc_INTRINSIC explicit Storage(const U &x,
|
||||
enable_if<sizeof(U) == sizeof(VectorType)> = nullarg)
|
||||
: data(reinterpret_cast<const VectorType &>(x))
|
||||
{
|
||||
assertCorrectAlignment(&data);
|
||||
}
|
||||
Vc_INTRINSIC Storage &operator=(const VectorType &x)
|
||||
{
|
||||
data = x;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vc_INTRINSIC Storage(const Storage &) = default;
|
||||
Vc_INTRINSIC Storage &operator=(const Storage &) = default;
|
||||
|
||||
Vc_INTRINSIC Vc_PURE VectorType &v() { return data; }
|
||||
Vc_INTRINSIC Vc_PURE const VectorType &v() const { return data; }
|
||||
|
||||
Vc_INTRINSIC_L Vc_PURE_L EntryType m(size_t i) const Vc_INTRINSIC_R Vc_PURE_R;
|
||||
Vc_INTRINSIC void set(size_t i, EntryType x) { ref(i) = x; }
|
||||
|
||||
private:
|
||||
Vc_INTRINSIC_L Vc_PURE_L EntryType &ref(size_t i) Vc_INTRINSIC_R Vc_PURE_R;
|
||||
VectorType data;
|
||||
};
|
||||
|
||||
#ifdef Vc_MSVC
|
||||
template <> Vc_INTRINSIC Vc_PURE double Storage< double, 2, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128d_f64[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE float Storage< float , 4, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128_f32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed int Storage< signed int , 4, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128i_i32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed short Storage< signed short , 8, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128i_i16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed char Storage< signed char ,16, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128i_i8[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned int Storage<unsigned int , 4, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128i_u32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned short Storage<unsigned short , 8, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128i_u16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned char Storage<unsigned char ,16, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m128i_u8[i]; }
|
||||
|
||||
template <> Vc_INTRINSIC Vc_PURE double &Storage< double, 2, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m128d_f64[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE float &Storage< float , 4, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m128_f32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed int &Storage< signed int , 4, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m128i_i32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed short &Storage< signed short , 8, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m128i_i16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed char &Storage< signed char ,16, AliasStrategy::UnionMembers>::ref(size_t i) { return reinterpret_cast<signed char &>(data.m128i_i8[i]); }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned int &Storage<unsigned int , 4, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m128i_u32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned short &Storage<unsigned short , 8, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m128i_u16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned char &Storage<unsigned char ,16, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m128i_u8[i]; }
|
||||
|
||||
#ifdef Vc_IMPL_AVX
|
||||
template <> Vc_INTRINSIC Vc_PURE double Storage< double, 4, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256d_f64[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE float Storage< float , 8, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256_f32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed int Storage< signed int , 8, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256i_i32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed short Storage< signed short ,16, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256i_i16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed char Storage< signed char ,32, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256i_i8[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned int Storage<unsigned int , 8, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256i_u32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned short Storage<unsigned short ,16, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256i_u16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned char Storage<unsigned char ,32, AliasStrategy::UnionMembers>::m(size_t i) const { return data.m256i_u8[i]; }
|
||||
|
||||
template <> Vc_INTRINSIC Vc_PURE double &Storage< double, 4, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m256d_f64[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE float &Storage< float , 8, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m256_f32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed int &Storage< signed int , 8, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m256i_i32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed short &Storage< signed short ,16, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m256i_i16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE signed char &Storage< signed char ,32, AliasStrategy::UnionMembers>::ref(size_t i) { return reinterpret_cast<signed char &>(data.m256i_i8[i]); }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned int &Storage<unsigned int , 8, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m256i_u32[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned short &Storage<unsigned short ,16, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m256i_u16[i]; }
|
||||
template <> Vc_INTRINSIC Vc_PURE unsigned char &Storage<unsigned char ,32, AliasStrategy::UnionMembers>::ref(size_t i) { return data.m256i_u8[i]; }
|
||||
#endif
|
||||
#endif // Vc_MSVC
|
||||
|
||||
template <typename VectorType, typename EntryType>
|
||||
using VectorMemoryUnion = Storage<EntryType, sizeof(VectorType) / sizeof(EntryType)>;
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_STORAGE_H_
|
|
@ -0,0 +1,92 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////
|
||||
// stores
|
||||
|
||||
/**
|
||||
* Store the vector data to \p mem.
|
||||
*
|
||||
* \param mem A pointer to memory, where \VSize{T} consecutive values will be stored.
|
||||
* \param flags The flags parameter can be used to select e.g. the Vc::Aligned,
|
||||
* Vc::Unaligned, Vc::Streaming, and/or Vc::PrefetchDefault flags.
|
||||
*/
|
||||
template <
|
||||
typename U,
|
||||
typename Flags = DefaultStoreTag,
|
||||
typename = enable_if<std::is_arithmetic<U>::value &&Traits::is_load_store_flag<Flags>::value>>
|
||||
Vc_INTRINSIC_L void store(U *mem, Flags flags = Flags()) const Vc_INTRINSIC_R;
|
||||
|
||||
/**
|
||||
* Store the vector data to \p mem where \p mask is set.
|
||||
*
|
||||
* \param mem A pointer to memory, where \VSize{T} consecutive values will be stored.
|
||||
* \param mask A mask object that determines which entries of the vector should be stored
|
||||
* to \p mem.
|
||||
* \param flags The flags parameter can be used to select e.g. the Vc::Aligned,
|
||||
* Vc::Unaligned, Vc::Streaming, and/or Vc::PrefetchDefault flags.
|
||||
*
|
||||
* \note
|
||||
* The masked store does not pack the values into memory. I.e. the value at offset \c i
|
||||
* will be stored to `mem[i]`, independent of whether `mask[j]` for any `j < i` is \c
|
||||
* false.
|
||||
*/
|
||||
template <
|
||||
typename U,
|
||||
typename Flags = DefaultStoreTag,
|
||||
typename = enable_if<std::is_arithmetic<U>::value &&Traits::is_load_store_flag<Flags>::value>>
|
||||
Vc_INTRINSIC_L void Vc_VDECL store(U *mem, MaskType mask, Flags flags = Flags()) const Vc_INTRINSIC_R;
|
||||
|
||||
//@{
|
||||
/**
|
||||
* The following store overloads support classes that have a cast operator to `EntryType
|
||||
* *`.
|
||||
*/
|
||||
Vc_INTRINSIC void store(EntryType *mem) const
|
||||
{
|
||||
store<EntryType, DefaultStoreTag>(mem, DefaultStoreTag());
|
||||
}
|
||||
|
||||
template <typename Flags, typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
|
||||
Vc_INTRINSIC void store(EntryType *mem, Flags flags) const
|
||||
{
|
||||
store<EntryType, Flags>(mem, flags);
|
||||
}
|
||||
|
||||
Vc_INTRINSIC void Vc_VDECL store(EntryType *mem, MaskType mask) const
|
||||
{
|
||||
store<EntryType, DefaultStoreTag>(mem, mask, DefaultStoreTag());
|
||||
}
|
||||
|
||||
template <typename Flags, typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
|
||||
Vc_INTRINSIC void Vc_VDECL store(EntryType *mem, MaskType mask, Flags flags) const
|
||||
{
|
||||
store<EntryType, Flags>(mem, mask, flags);
|
||||
}
|
||||
//@}
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,526 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_SUBSCRIPT_H_
|
||||
#define VC_COMMON_SUBSCRIPT_H_
|
||||
|
||||
#include <initializer_list>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include "types.h"
|
||||
#include "macros.h"
|
||||
#include <assert.h>
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
// AdaptSubscriptOperator {{{
|
||||
template <typename Base> class AdaptSubscriptOperator : public Base
|
||||
{
|
||||
public:
|
||||
// perfect forward all Base constructors
|
||||
template <typename... Args>
|
||||
Vc_ALWAYS_INLINE AdaptSubscriptOperator(Args &&... arguments)
|
||||
: Base(std::forward<Args>(arguments)...)
|
||||
{
|
||||
}
|
||||
|
||||
// perfect forward all Base constructors
|
||||
template <typename T>
|
||||
Vc_ALWAYS_INLINE AdaptSubscriptOperator(std::initializer_list<T> l)
|
||||
: Base(l)
|
||||
{
|
||||
}
|
||||
|
||||
// explicitly enable Base::operator[] because the following would hide it
|
||||
using Base::operator[];
|
||||
|
||||
/// \internal forward to non-member subscript_operator function
|
||||
template <typename I,
|
||||
typename = enable_if<!std::is_arithmetic<
|
||||
typename std::decay<I>::type>::value> // arithmetic types
|
||||
// should always use
|
||||
// Base::operator[] and
|
||||
// never match this one
|
||||
>
|
||||
Vc_ALWAYS_INLINE auto operator[](I &&arg_)
|
||||
-> decltype(subscript_operator(*this, std::forward<I>(arg_)))
|
||||
{
|
||||
return subscript_operator(*this, std::forward<I>(arg_));
|
||||
}
|
||||
|
||||
// const overload of the above
|
||||
template <typename I, typename = enable_if<
|
||||
!std::is_arithmetic<typename std::decay<I>::type>::value>>
|
||||
Vc_ALWAYS_INLINE auto operator[](I &&arg_) const
|
||||
-> decltype(subscript_operator(*this, std::forward<I>(arg_)))
|
||||
{
|
||||
return subscript_operator(*this, std::forward<I>(arg_));
|
||||
}
|
||||
};
|
||||
|
||||
// }}}
|
||||
// is_valid_indexvector {{{
|
||||
template <class T, class = decltype(convertIndexVector(std::declval<T>()))>
|
||||
std::true_type is_valid_indexvector(T &&);
|
||||
std::false_type is_valid_indexvector(...);
|
||||
|
||||
template <class IndexVector, class Test = decltype(is_valid_indexvector(
|
||||
std::declval<const IndexVector &>()))>
|
||||
struct is_valid_indexvector_ : public std::integral_constant<bool, Test::value> {
|
||||
};
|
||||
static_assert(!is_valid_indexvector_<const int *>::value,
|
||||
"Pointer is incorrectly classified as valid index vector type");
|
||||
static_assert(is_valid_indexvector_<const int[4]>::value,
|
||||
"C-Array is incorrectly classified as invalid index vector type");
|
||||
|
||||
// }}}
|
||||
// apply Scale (std::ratio) functions {{{1
|
||||
template <typename Scale, typename T>
|
||||
Vc_ALWAYS_INLINE enable_if<Scale::num == Scale::den, Traits::decay<T>> applyScale(T &&x)
|
||||
{
|
||||
return std::forward<T>(x);
|
||||
}
|
||||
|
||||
template <typename Scale, typename T>
|
||||
Vc_ALWAYS_INLINE enable_if<
|
||||
Scale::num != Scale::den && Traits::has_multiply_operator<T, int>::value,
|
||||
Traits::decay<T>>
|
||||
applyScale(T &&x)
|
||||
{
|
||||
static_assert(Scale::num % Scale::den == 0,
|
||||
"Non-integral index scaling requested. This typically happens only for "
|
||||
"Vc::Scalar on 32-bit for gathers on double. You can work around the "
|
||||
"issue by ensuring that all doubles in the structure are aligned on 8 "
|
||||
"Bytes.");
|
||||
constexpr int value = Scale::num / Scale::den;
|
||||
Vc_ASSERT(Vc::all_of((x * value) / value == x));
|
||||
return std::forward<T>(x) * value;
|
||||
}
|
||||
|
||||
template <typename Scale, typename T>
|
||||
Vc_ALWAYS_INLINE enable_if<
|
||||
Scale::num != Scale::den && !Traits::has_multiply_operator<T, int>::value,
|
||||
T>
|
||||
applyScale(T x)
|
||||
{
|
||||
static_assert(Scale::num % Scale::den == 0,
|
||||
"Non-integral index scaling requested. This typically happens only for "
|
||||
"Vc::Scalar on 32-bit for gathers on double. You can work around the "
|
||||
"issue by ensuring that all doubles in the structure are aligned on 8 "
|
||||
"Bytes.");
|
||||
constexpr int value = Scale::num / Scale::den;
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
Vc_ASSERT((x[i] * value) / value == x[i]);
|
||||
x[i] *= value;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename Scale, typename T, typename U,
|
||||
typename = enable_if<Traits::has_multiply_operator<T, int>::value &&
|
||||
Traits::has_addition_operator<T, U>::value>>
|
||||
Vc_ALWAYS_INLINE typename std::decay<T>::type applyScaleAndAdd(T &&x, U &&y)
|
||||
{
|
||||
constexpr int value = Scale::num / Scale::den;
|
||||
if (value == 1) { // static evaluation
|
||||
return std::forward<T>(x) + std::forward<U>(y);
|
||||
}
|
||||
return std::forward<T>(x) * value + std::forward<U>(y);
|
||||
}
|
||||
|
||||
template <
|
||||
typename Scale, typename T, typename U,
|
||||
typename = enable_if<
|
||||
!(Traits::has_multiply_operator<T &, int>::value &&
|
||||
Traits::has_addition_operator<T &, decltype(std::declval<U>()[0])>::value) &&
|
||||
Traits::has_subscript_operator<U>::value>>
|
||||
Vc_ALWAYS_INLINE T applyScaleAndAdd(T x, U &&y)
|
||||
{
|
||||
constexpr int value = Scale::num / Scale::den;
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
if (value == 1) { // static evaluation
|
||||
x[i] = x[i] + y[i];
|
||||
} else {
|
||||
x[i] = x[i] * value + y[i];
|
||||
}
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename Scale, typename T, typename U>
|
||||
Vc_ALWAYS_INLINE enable_if<!(Traits::has_multiply_operator<T &, int>::value &&
|
||||
Traits::has_addition_operator<T &, U>::value) &&
|
||||
!Traits::has_subscript_operator<U>::value,
|
||||
T>
|
||||
applyScaleAndAdd(T x, U &&y)
|
||||
{
|
||||
constexpr int value = Scale::num / Scale::den;
|
||||
for (size_t i = 0; i < x.size(); ++i) {
|
||||
if (value == 1) { // static evaluation
|
||||
x[i] = x[i] + y;
|
||||
} else {
|
||||
x[i] = x[i] * value + y;
|
||||
}
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
// IndexVectorSizeMatches {{{1
|
||||
template <std::size_t MinSize,
|
||||
typename IndexT,
|
||||
bool = Traits::is_simd_vector<IndexT>::value>
|
||||
struct IndexVectorSizeMatches
|
||||
: public std::true_type // you might expect this should be false_type here, but the point is
|
||||
// that IndexT is a type where the size is not known at compile time.
|
||||
// Thus it may be good but we cannot know from the type. The only check
|
||||
// we could do is a runtime check, but the type is fine.
|
||||
{
|
||||
};
|
||||
|
||||
template <std::size_t MinSize, typename V>
|
||||
struct IndexVectorSizeMatches<MinSize,
|
||||
V,
|
||||
true> : public std::integral_constant<bool, (MinSize <= V::Size)>
|
||||
{
|
||||
};
|
||||
|
||||
template <std::size_t MinSize, typename T, std::size_t ArraySize>
|
||||
struct IndexVectorSizeMatches<MinSize,
|
||||
T[ArraySize],
|
||||
false> : public std::integral_constant<bool, (MinSize <= ArraySize)>
|
||||
{
|
||||
};
|
||||
|
||||
template <std::size_t MinSize, typename T, std::size_t ArraySize>
|
||||
struct IndexVectorSizeMatches<MinSize,
|
||||
std::array<T, ArraySize>,
|
||||
false> : public std::integral_constant<bool, (MinSize <= ArraySize)>
|
||||
{
|
||||
};
|
||||
|
||||
template <std::size_t MinSize, typename T, std::size_t ArraySize>
|
||||
struct IndexVectorSizeMatches<MinSize,
|
||||
Vc::array<T, ArraySize>,
|
||||
false> : public std::integral_constant<bool, (MinSize <= ArraySize)>
|
||||
{
|
||||
};
|
||||
|
||||
template <std::size_t MinSize, typename T, std::ptrdiff_t N>
|
||||
struct IndexVectorSizeMatches<MinSize, Vc::Common::span<T, N>, false>
|
||||
: public std::integral_constant<bool, (N == -1 || static_cast<std::ptrdiff_t>(MinSize) <= N)> {
|
||||
};
|
||||
// SubscriptOperation {{{1
|
||||
template <
|
||||
typename T, typename IndexVector, typename Scale = std::ratio<1, 1>,
|
||||
bool = is_valid_indexvector_<IndexVector>::value>
|
||||
class SubscriptOperation
|
||||
{
|
||||
const IndexVector m_indexes;
|
||||
T *const m_address;
|
||||
using ScalarType = typename std::decay<T>::type;
|
||||
|
||||
using IndexVectorScaled = Traits::decay<decltype(convertIndexVector(std::declval<const IndexVector &>()))>;
|
||||
|
||||
public:
|
||||
// try to stop the user from forming lvalues of this type
|
||||
SubscriptOperation &operator=(const SubscriptOperation &) = delete;
|
||||
SubscriptOperation(const SubscriptOperation &) = delete;
|
||||
#ifndef __cpp_guaranteed_copy_elision
|
||||
constexpr SubscriptOperation(SubscriptOperation &&) = default;
|
||||
#endif
|
||||
|
||||
template <typename U,
|
||||
typename = enable_if<((std::is_convertible<const U &, IndexVector>::value ||
|
||||
std::is_same<U, IndexVector>::value) &&
|
||||
std::is_copy_constructible<IndexVector>::value)>>
|
||||
constexpr Vc_ALWAYS_INLINE SubscriptOperation(T *address, const U &indexes)
|
||||
: m_indexes(indexes), m_address(address)
|
||||
{
|
||||
}
|
||||
|
||||
template <std::size_t... Indexes>
|
||||
constexpr Vc_ALWAYS_INLINE SubscriptOperation(T *address, const IndexVector &indexes,
|
||||
index_sequence<Indexes...>)
|
||||
: m_indexes{indexes[Indexes]...}, m_address(address)
|
||||
{}
|
||||
|
||||
template <typename U>
|
||||
constexpr Vc_ALWAYS_INLINE SubscriptOperation(
|
||||
T *address, const U &indexes,
|
||||
enable_if<((std::is_convertible<const U &, IndexVector>::value ||
|
||||
std::is_same<U, IndexVector>::value) &&
|
||||
!std::is_copy_constructible<IndexVector>::value &&
|
||||
std::is_array<IndexVector>::value &&
|
||||
std::extent<IndexVector>::value > 0)> = nullarg)
|
||||
: SubscriptOperation(address, indexes,
|
||||
make_index_sequence<std::extent<IndexVector>::value>())
|
||||
{
|
||||
}
|
||||
|
||||
static constexpr bool need_explicit_scaling =
|
||||
Scale::num % Scale::den != 0 || Scale::num / Scale::den * sizeof(T) > 8;
|
||||
|
||||
Vc_ALWAYS_INLINE
|
||||
GatherArguments<typename std::remove_cv<T>::type, IndexVectorScaled,
|
||||
(need_explicit_scaling ? 1 : Scale::num / Scale::den)>
|
||||
gatherArguments() &&
|
||||
{
|
||||
static_assert(std::is_arithmetic<ScalarType>::value,
|
||||
"Incorrect type for a SIMD vector gather. Must be an arithmetic type.");
|
||||
return {applyScale<typename std::conditional<need_explicit_scaling, Scale,
|
||||
std::ratio<1, 1>>::type>(
|
||||
convertIndexVector(m_indexes)),
|
||||
m_address};
|
||||
}
|
||||
|
||||
Vc_ALWAYS_INLINE ScatterArguments<T, IndexVectorScaled> scatterArguments() &&
|
||||
{
|
||||
static_assert(std::is_arithmetic<ScalarType>::value,
|
||||
"Incorrect type for a SIMD vector scatter. Must be an arithmetic type.");
|
||||
return {applyScale<Scale>(convertIndexVector(m_indexes)), m_address};
|
||||
}
|
||||
|
||||
template <typename V,
|
||||
typename = enable_if<(std::is_arithmetic<ScalarType>::value &&Traits::is_simd_vector<
|
||||
V>::value &&IndexVectorSizeMatches<V::Size, IndexVector>::value)>>
|
||||
Vc_INTRINSIC operator V() &&
|
||||
{
|
||||
return V(static_cast<SubscriptOperation &&>(*this).gatherArguments());
|
||||
}
|
||||
|
||||
template <typename V,
|
||||
typename = enable_if<(std::is_arithmetic<ScalarType>::value &&Traits::is_simd_vector<
|
||||
V>::value &&IndexVectorSizeMatches<V::Size, IndexVector>::value)>>
|
||||
Vc_ALWAYS_INLINE SubscriptOperation &operator=(const V &rhs) &&
|
||||
{
|
||||
static_assert(std::is_arithmetic<ScalarType>::value,
|
||||
"Incorrect type for a SIMD vector scatter. Must be an arithmetic type.");
|
||||
const auto indexes = applyScale<Scale>(convertIndexVector(m_indexes));
|
||||
rhs.scatter(m_address, indexes);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// precondition: m_address points to a struct/class/union
|
||||
template <
|
||||
typename U,
|
||||
typename S, // S must be equal to T. Still we require this template parameter -
|
||||
// otherwise instantiation of SubscriptOperation would only be valid for
|
||||
// structs/unions.
|
||||
typename = enable_if<std::is_same<S, typename std::remove_cv<T>::type>::value &&(
|
||||
std::is_class<T>::value || std::is_union<T>::value)>>
|
||||
Vc_ALWAYS_INLINE auto operator[](U S::*member) &&
|
||||
-> SubscriptOperation<
|
||||
typename std::conditional<std::is_const<T>::value,
|
||||
const typename std::remove_reference<U>::type,
|
||||
typename std::remove_reference<U>::type>::type,
|
||||
IndexVector,
|
||||
// By passing the scale factor as a fraction of integers in the template
|
||||
// arguments the value does not lose information if the division yields a
|
||||
// non-integral value. This could happen e.g. for a struct of struct (S2 {
|
||||
// S1, char }, with sizeof(S1) = 16, sizeof(S2) = 20. Then scale would be
|
||||
// 20/16)
|
||||
std::ratio_multiply<Scale, std::ratio<sizeof(S), sizeof(U)>>>
|
||||
{
|
||||
static_assert(std::is_same<Traits::decay<decltype(m_address->*member)>,
|
||||
Traits::decay<U>>::value,
|
||||
"Type mismatch that should be impossible.");
|
||||
// TODO: check whether scale really works for unions correctly
|
||||
return {&(m_address->*member), m_indexes};
|
||||
}
|
||||
|
||||
/*
|
||||
* The following functions allow subscripting of nested arrays. But
|
||||
* there are two cases of containers and only one that we want to support:
|
||||
* 1. actual arrays (e.g. T[N] or std::array<T, N>)
|
||||
* 2. dynamically allocated vectors (e.g. std::vector<T>)
|
||||
*
|
||||
* For (1.) the offset calculation is straightforward.
|
||||
* For (2.) the m_address pointer points to memory where pointers are
|
||||
* stored to the actual data. Meaning the data can be scattered
|
||||
* freely in memory (and far away from what m_address points to). Supporting this leads to
|
||||
* serious trouble with the pointer (it does not really point to the start of a memory
|
||||
* region anymore) and inefficient code. The user is better off to write a loop that assigns the
|
||||
* scalars to the vector object sequentially.
|
||||
*/
|
||||
|
||||
private:
|
||||
// The following is a workaround for MSVC 2015 Update 2. Whenever the ratio
|
||||
// in the return type of the following operator[] is encountered with a sizeof
|
||||
// expression that fails, MSVC decides to substitute a 0 for the sizeof instead of
|
||||
// just leaving the ratio instantiation alone via proper SFINAE. The make_ratio helper
|
||||
// ensures that the 0 from the sizeof failure does not reach the denominator of
|
||||
// std::ratio where it would hit a static_assert.
|
||||
template <intmax_t N, intmax_t D> struct make_ratio {
|
||||
using type = std::ratio<N, D == 0 ? 1 : D>;
|
||||
};
|
||||
|
||||
public:
|
||||
// precondition: m_address points to a type that implements the subscript operator
|
||||
template <typename U>
|
||||
// U is only required to delay name lookup to the 2nd phase (on use).
|
||||
// This is necessary because m_address[0][index] is only a correct
|
||||
// expression if has_subscript_operator<T>::value is true.
|
||||
Vc_ALWAYS_INLINE auto operator[](U index) && -> typename std::enable_if<
|
||||
#ifndef Vc_IMPROVE_ERROR_MESSAGES
|
||||
Traits::has_no_allocated_data<T>::value &&
|
||||
#endif
|
||||
std::is_convertible<U, size_t>::value,
|
||||
SubscriptOperation<
|
||||
// the following decltype expression must depend on index and cannot
|
||||
// simply use [0][0] because it would yield an invalid expression in
|
||||
// case m_address[0] returns a struct/union
|
||||
typename std::remove_reference<decltype(m_address[0][index])>::type,
|
||||
IndexVector,
|
||||
std::ratio_multiply<
|
||||
Scale,
|
||||
typename make_ratio<sizeof(T), sizeof(m_address[0][index])>::type>>>::type
|
||||
{
|
||||
static_assert(Traits::has_subscript_operator<T>::value,
|
||||
"The subscript operator was called on a type that does not implement it.\n");
|
||||
static_assert(Traits::has_no_allocated_data<T>::value,
|
||||
"Invalid container type in gather/scatter operation.\nYou may only use "
|
||||
"nested containers that store the data inside the object (such as builtin "
|
||||
"arrays or std::array) but not containers that store data in allocated "
|
||||
"memory (such as std::vector).\nSince this feature cannot be queried "
|
||||
"generically at compile time you need to spezialize the "
|
||||
"Vc::Traits::has_no_allocated_data_impl<T> type-trait for custom types that "
|
||||
"meet the requirements.\n");
|
||||
static_assert(std::is_lvalue_reference<decltype(m_address[0][index])>::value,
|
||||
"The container does not return an lvalue reference to the data at "
|
||||
"the requested offset. This makes it impossible to execute a "
|
||||
"gather operation.\n");
|
||||
return {&(m_address[0][index]), m_indexes};
|
||||
}
|
||||
|
||||
// precondition: m_address points to a type that implements the subscript operator
|
||||
template <typename IT>
|
||||
Vc_ALWAYS_INLINE typename std::enable_if<
|
||||
#ifndef Vc_IMPROVE_ERROR_MESSAGES
|
||||
Traits::has_no_allocated_data<T>::value &&
|
||||
Traits::has_subscript_operator<T>::value &&
|
||||
#endif
|
||||
Traits::has_subscript_operator<IT>::value,
|
||||
SubscriptOperation<typename std::remove_reference<decltype(
|
||||
m_address[0][std::declval<
|
||||
const IT &>()[0]] // std::declval<IT>()[0] could
|
||||
// be replaced with 0 if it
|
||||
// were not for two-phase lookup. We need to make the
|
||||
// m_address[0][0] expression dependent on IT
|
||||
)>::type,
|
||||
IndexVectorScaled,
|
||||
std::ratio<1, 1> // reset Scale to 1 since it is applied below
|
||||
>>::type
|
||||
operator[](const IT &index) &&
|
||||
{
|
||||
static_assert(Traits::has_subscript_operator<T>::value,
|
||||
"The subscript operator was called on a type that does not implement it.\n");
|
||||
static_assert(Traits::has_no_allocated_data<T>::value,
|
||||
"Invalid container type in gather/scatter operation.\nYou may only use "
|
||||
"nested containers that store the data inside the object (such as builtin "
|
||||
"arrays or std::array) but not containers that store data in allocated "
|
||||
"memory (such as std::vector).\nSince this feature cannot be queried "
|
||||
"generically at compile time you need to spezialize the "
|
||||
"Vc::Traits::has_no_allocated_data_impl<T> type-trait for custom types that "
|
||||
"meet the requirements.\n");
|
||||
return {&(m_address[0][0]),
|
||||
applyScaleAndAdd<std::ratio_multiply<
|
||||
Scale, std::ratio<sizeof(T), sizeof(m_address[0][0])>>>(
|
||||
convertIndexVector(m_indexes), index)};
|
||||
}
|
||||
};
|
||||
|
||||
// specialization for invalid IndexVector type
|
||||
template <typename T, typename IndexVector, typename Scale>
|
||||
class SubscriptOperation<T, IndexVector, Scale, false>;
|
||||
|
||||
// subscript_operator {{{1
|
||||
template <
|
||||
typename Container,
|
||||
typename IndexVector,
|
||||
typename = enable_if<
|
||||
Traits::has_subscript_operator<IndexVector>::value // The index vector must provide [] for
|
||||
// the implementations of gather/scatter
|
||||
&&Traits::has_contiguous_storage<Container>::value // Container must use contiguous
|
||||
// storage, otherwise the index vector
|
||||
// cannot be used as memory offsets, which is required for efficient
|
||||
// gather/scatter implementations
|
||||
&&std::is_lvalue_reference<decltype(*begin(std::declval<
|
||||
Container>()))>::value // dereferencing the begin iterator must yield an lvalue
|
||||
// reference (const or non-const). Otherwise it is not possible
|
||||
// to determine a pointer to the data storage (see above).
|
||||
>>
|
||||
Vc_ALWAYS_INLINE SubscriptOperation<
|
||||
typename std::remove_reference<decltype(*begin(std::declval<Container>()))>::
|
||||
type, // the type of the first value in the container is what the internal array pointer
|
||||
// has to point to. But if the subscript operator of the container returns a
|
||||
// reference we need to drop that part because it's useless information for us. But
|
||||
// const and volatile, as well as array rank/extent are interesting and need not be
|
||||
// dropped.
|
||||
typename std::remove_const<typename std::remove_reference<
|
||||
IndexVector>::type>::type // keep volatile and possibly the array extent, but the const and
|
||||
// & parts of the type need to be removed because
|
||||
// SubscriptOperation explicitly adds them for its member type
|
||||
> subscript_operator(Container &&c, IndexVector &&indexes)
|
||||
{
|
||||
Vc_ASSERT(std::addressof(*begin(c)) + 1 ==
|
||||
std::addressof(*(begin(c) + 1))); // runtime assertion for contiguous storage, this
|
||||
// requires a RandomAccessIterator - but that
|
||||
// should be given for a container with contiguous
|
||||
// storage
|
||||
return {std::addressof(*begin(c)), std::forward<IndexVector>(indexes)};
|
||||
}
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* Implement subscripts of std::initializer_list. This function must be in the global scope
|
||||
* because Container arguments may be in any scope. The other argument is in std scope.
|
||||
*
|
||||
* -----
|
||||
* std::initializer_list does not have constexpr member functions in C++11, but from C++14 onwards
|
||||
* the world is a happier place. :)
|
||||
*/
|
||||
template <typename Container, typename I>
|
||||
Vc_ALWAYS_INLINE Vc::Common::SubscriptOperation<
|
||||
typename std::remove_reference<decltype(std::declval<Container>()[0])>::type,
|
||||
const std::initializer_list<I> &> subscript_operator(Container &&vec,
|
||||
const std::initializer_list<I> &indexes)
|
||||
{
|
||||
return {&vec[0], indexes};
|
||||
}
|
||||
//}}}1
|
||||
|
||||
} // namespace Common
|
||||
|
||||
using Common::subscript_operator;
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_SUBSCRIPT_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,7 @@
|
|||
#ifndef VC_DEPRECATED_COMMON_SUPPORT_H_
|
||||
#define VC_DEPRECATED_COMMON_SUPPORT_H_
|
||||
#ifdef __GNUC__
|
||||
#warning "the <Vc/common/support.h> header is deprecated. Use <Vc/support.h> instead."
|
||||
#endif
|
||||
#include <Vc/support.h>
|
||||
#endif // VC_DEPRECATED_COMMON_SUPPORT_H_
|
|
@ -0,0 +1,57 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_TRANSPOSE_H_
|
||||
#define VC_COMMON_TRANSPOSE_H_
|
||||
|
||||
#include "macros.h"
|
||||
#include <tuple>
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
template <typename... Inputs> struct TransposeProxy
|
||||
{
|
||||
TransposeProxy(const Inputs &... inputs) : in{inputs...} {}
|
||||
|
||||
std::tuple<const Inputs &...> in;
|
||||
};
|
||||
|
||||
template <int LhsLength, size_t RhsLength> struct TransposeTag {
|
||||
};
|
||||
} // namespace Common
|
||||
|
||||
template <typename... Vs> Common::TransposeProxy<Vs...> transpose(Vs... vs)
|
||||
{
|
||||
return {vs...};
|
||||
}
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_TRANSPOSE_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,226 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_TRIGONOMETRIC_H_
|
||||
#define VC_COMMON_TRIGONOMETRIC_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
#ifdef Vc_HAVE_LIBMVEC
|
||||
extern "C" {
|
||||
__m128 _ZGVbN4v_sinf(__m128);
|
||||
__m128d _ZGVbN2v_sin(__m128d);
|
||||
__m128 _ZGVbN4v_cosf(__m128);
|
||||
__m128d _ZGVbN2v_cos(__m128d);
|
||||
__m256 _ZGVdN8v_sinf(__m256);
|
||||
__m256d _ZGVdN4v_sin(__m256d);
|
||||
__m256 _ZGVdN8v_cosf(__m256);
|
||||
__m256d _ZGVdN4v_cos(__m256d);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Detail
|
||||
{
|
||||
template<Vc::Implementation Impl> struct MapImpl { enum Dummy { Value = Impl }; };
|
||||
template<> struct MapImpl<Vc::SSE42Impl> { enum Dummy { Value = MapImpl<Vc::SSE41Impl>::Value }; };
|
||||
|
||||
template<Vc::Implementation Impl> using TrigonometricImplementation =
|
||||
ImplementationT<MapImpl<Impl>::Value
|
||||
#if defined(Vc_IMPL_XOP) && defined(Vc_IMPL_FMA4)
|
||||
+ Vc::XopInstructions
|
||||
+ Vc::Fma4Instructions
|
||||
#endif
|
||||
>;
|
||||
} // namespace Detail
|
||||
|
||||
namespace Common
|
||||
{
|
||||
template<typename Impl> struct Trigonometric
|
||||
{
|
||||
template<typename T> static T Vc_VDECL sin(const T &_x);
|
||||
template<typename T> static T Vc_VDECL cos(const T &_x);
|
||||
template<typename T> static void Vc_VDECL sincos(const T &_x, T *_sin, T *_cos);
|
||||
template<typename T> static T Vc_VDECL asin (const T &_x);
|
||||
template<typename T> static T Vc_VDECL atan (const T &_x);
|
||||
template<typename T> static T Vc_VDECL atan2(const T &y, const T &x);
|
||||
};
|
||||
} // namespace Common
|
||||
|
||||
#if defined Vc_IMPL_SSE || defined DOXYGEN
|
||||
// this is either SSE, AVX, or AVX2
|
||||
namespace Detail
|
||||
{
|
||||
template <typename T, typename Abi>
|
||||
using Trig = Common::Trigonometric<Detail::TrigonometricImplementation<
|
||||
(std::is_same<Abi, VectorAbi::Sse>::value
|
||||
? SSE42Impl
|
||||
: std::is_same<Abi, VectorAbi::Avx>::value ? AVXImpl : ScalarImpl)>>;
|
||||
} // namespace Detail
|
||||
|
||||
#ifdef Vc_HAVE_LIBMVEC
|
||||
Vc_INTRINSIC __m128 sin_dispatch(__m128 x) { return ::_ZGVbN4v_sinf(x); }
|
||||
Vc_INTRINSIC __m128d sin_dispatch(__m128d x) { return ::_ZGVbN2v_sin (x); }
|
||||
Vc_INTRINSIC __m128 cos_dispatch(__m128 x) { return ::_ZGVbN4v_cosf(x); }
|
||||
Vc_INTRINSIC __m128d cos_dispatch(__m128d x) { return ::_ZGVbN2v_cos (x); }
|
||||
#ifdef Vc_IMPL_AVX
|
||||
Vc_INTRINSIC __m256 sin_dispatch(__m256 x) { return ::_ZGVdN8v_sinf(x); }
|
||||
Vc_INTRINSIC __m256d sin_dispatch(__m256d x) { return ::_ZGVdN4v_sin (x); }
|
||||
Vc_INTRINSIC __m256 cos_dispatch(__m256 x) { return ::_ZGVdN8v_cosf(x); }
|
||||
Vc_INTRINSIC __m256d cos_dispatch(__m256d x) { return ::_ZGVdN4v_cos (x); }
|
||||
#endif
|
||||
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> sin(const Vector<T, Abi> &x)
|
||||
{
|
||||
return sin_dispatch(x.data());
|
||||
}
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> cos(const Vector<T, Abi> &x)
|
||||
{
|
||||
return cos_dispatch(x.data());
|
||||
}
|
||||
#else
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Returns the sine of all input values in \p x.
|
||||
*
|
||||
* \param x The values to apply the sine function on.
|
||||
*
|
||||
* \returns the sine of \p x.
|
||||
*
|
||||
* \note The single-precision implementation has a precision of max. 2 ulp (mean 0.17 ulp)
|
||||
* in the range [-8192, 8192].
|
||||
* (testSin< float_v> with a maximal distance of 2 to the reference (mean: 0.310741))
|
||||
*
|
||||
* \note The double-precision implementation has a precision of max. 3 ulp (mean 1040 ulp)
|
||||
* in the range [-8192, 8192].
|
||||
* (testSin<double_v> with a maximal distance of 1 to the reference (mean: 0.170621))
|
||||
*
|
||||
* \note The precision and execution latency depends on:
|
||||
* - `Abi` (e.g. Scalar uses the `<cmath>` implementation
|
||||
* - whether `Vc_HAVE_LIBMVEC` is defined
|
||||
* - for the `<cmath>` fallback, the implementations differ (e.g. MacOS vs. Linux
|
||||
* vs. Windows; fpmath=sse vs. fpmath=387)
|
||||
*
|
||||
* \note Vc versions before 1.4 had different precision.
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> sin(const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::Trig<T, Abi>::sin(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Returns the cosine of all input values in \p x.
|
||||
*
|
||||
* \param x The values to apply the cosine function on.
|
||||
* \returns the cosine of \p x.
|
||||
*
|
||||
* \note The single-precision implementation has a precision of max. 2 ulp (mean 0.18 ulp) in the range [-8192, 8192].
|
||||
* \note The double-precision implementation has a precision of max. 3 ulp (mean 1160 ulp) in the range [-8192, 8192].
|
||||
* \note Vc versions before 1.4 had different precision.
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> cos(const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::Trig<T, Abi>::cos(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Returns the arcsine of all input values in \p x.
|
||||
*
|
||||
* \param x The values to apply the arcsine function on.
|
||||
* \returns the arcsine of \p x.
|
||||
*
|
||||
* \note The single-precision implementation has an error of max. 2 ulp (mean 0.3 ulp).
|
||||
* \note The double-precision implementation has an error of max. 36 ulp (mean 0.4 ulp).
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> asin(const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::Trig<T, Abi>::asin(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Returns the arctangent of all input values in \p x.
|
||||
*
|
||||
* \param x The values to apply the arctangent function on.
|
||||
* \returns the arctangent of \p x.
|
||||
* \note The single-precision implementation has an error of max. 3 ulp (mean 0.4 ulp) in the range [-8192, 8192].
|
||||
* \note The double-precision implementation has an error of max. 2 ulp (mean 0.1 ulp) in the range [-8192, 8192].
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> atan(const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::Trig<T, Abi>::atan(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Returns the arctangent of all input values in \p x and \p y.
|
||||
*
|
||||
* Calculates the angle given the lengths of the opposite and adjacent legs in a right
|
||||
* triangle.
|
||||
* \param y The opposite leg.
|
||||
* \param x The adjacent leg.
|
||||
* \returns the arctangent of \p y / \p x.
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> atan2(const Vector<T, Abi> &y,
|
||||
const Vector<T, Abi> &x)
|
||||
{
|
||||
return Detail::Trig<T, Abi>::atan2(y, x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \ingroup Math
|
||||
*
|
||||
* \param x Input value to both sine and cosine.
|
||||
* \param sin A non-null pointer to a potentially uninitialized object of type Vector.
|
||||
* When \c sincos returns, `*sin` contains the result of `sin(x)`.
|
||||
* \param cos A non-null pointer to a potentially uninitialized object of type Vector.
|
||||
* When \c sincos returns, `*cos` contains the result of `cos(x)`.
|
||||
*
|
||||
* \see sin, cos
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC void sincos(const Vector<T, Abi> &x,
|
||||
Vector<T, detail::not_fixed_size_abi<Abi>> *sin,
|
||||
Vector<T, Abi> *cos)
|
||||
{
|
||||
Detail::Trig<T, Abi>::sincos(x, sin, cos);
|
||||
}
|
||||
#endif
|
||||
} // namespace Vc_VERSIONED_NAMESPACE
|
||||
|
||||
#endif // VC_COMMON_TRIGONOMETRIC_H_
|
|
@ -0,0 +1,402 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2012-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_TYPES_H_
|
||||
#define VC_COMMON_TYPES_H_
|
||||
|
||||
#ifdef Vc_CHECK_ALIGNMENT
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#include <ratio>
|
||||
#include "../global.h"
|
||||
#include "../traits/type_traits.h"
|
||||
#include "permutation.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
///\addtogroup Utilities
|
||||
///@{
|
||||
|
||||
/// \internal Allow writing \c size_t without the `std::` prefix.
|
||||
using std::size_t;
|
||||
|
||||
/// long long shorthand
|
||||
using llong = long long;
|
||||
/// unsigned long long shorthand
|
||||
using ullong = unsigned long long;
|
||||
/// unsigned long shorthand
|
||||
using ulong = unsigned long;
|
||||
/// unsigned int shorthand
|
||||
using uint = unsigned int;
|
||||
/// unsigned short shorthand
|
||||
using ushort = unsigned short;
|
||||
/// unsigned char shorthand
|
||||
using uchar = unsigned char;
|
||||
/// signed char shorthand
|
||||
using schar = signed char;
|
||||
|
||||
/**\internal
|
||||
* Tag type for explicit zero-initialization
|
||||
*/
|
||||
struct VectorSpecialInitializerZero {};
|
||||
/**\internal
|
||||
* Tag type for explicit one-initialization
|
||||
*/
|
||||
struct VectorSpecialInitializerOne {};
|
||||
/**\internal
|
||||
* Tag type for explicit "iota-initialization"
|
||||
*/
|
||||
struct VectorSpecialInitializerIndexesFromZero {};
|
||||
|
||||
/**
|
||||
* The special object \p Vc::Zero can be used to construct Vector and Mask objects
|
||||
* initialized to zero/\c false.
|
||||
*/
|
||||
constexpr VectorSpecialInitializerZero Zero = {};
|
||||
/**
|
||||
* The special object \p Vc::One can be used to construct Vector and Mask objects
|
||||
* initialized to one/\c true.
|
||||
*/
|
||||
constexpr VectorSpecialInitializerOne One = {};
|
||||
/**
|
||||
* The special object \p Vc::IndexesFromZero can be used to construct Vector objects
|
||||
* initialized to values 0, 1, 2, 3, 4, ...
|
||||
*/
|
||||
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero = {};
|
||||
///@}
|
||||
|
||||
namespace Detail
|
||||
{
|
||||
template<typename T> struct MayAliasImpl {
|
||||
#ifdef Vc_ICC
|
||||
#pragma warning(disable:2621)
|
||||
#endif
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wattributes"
|
||||
#endif
|
||||
typedef T type Vc_MAY_ALIAS;
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#ifdef Vc_ICC
|
||||
#pragma warning(enable:2621)
|
||||
#endif
|
||||
};
|
||||
//template<size_t Bytes> struct MayAlias<MaskBool<Bytes>> { typedef MaskBool<Bytes> type; };
|
||||
} // namespace Detail
|
||||
/**\internal
|
||||
* Helper MayAlias<T> that turns T into the type to be used for an aliasing pointer. This
|
||||
* adds the may_alias attribute to T (with compilers that support it). But for MaskBool this
|
||||
* attribute is already part of the type and applying it a second times leads to warnings/errors,
|
||||
* therefore MaskBool is simply forwarded as is.
|
||||
*/
|
||||
template <typename T> using MayAlias = typename Detail::MayAliasImpl<T>::type;
|
||||
|
||||
template <class To, class From> MayAlias<To> &aliasing_cast(From &x)
|
||||
{
|
||||
return *reinterpret_cast<MayAlias<To> *>(&x);
|
||||
}
|
||||
template <class To, class From> const MayAlias<To> &aliasing_cast(const From &x)
|
||||
{
|
||||
return *reinterpret_cast<const MayAlias<To> *>(&x);
|
||||
}
|
||||
|
||||
template <class To, class From> MayAlias<To> *aliasing_cast(From *x)
|
||||
{
|
||||
return reinterpret_cast<MayAlias<To> *>(x);
|
||||
}
|
||||
template <class To, class From> const MayAlias<To> *aliasing_cast(const From *x)
|
||||
{
|
||||
return reinterpret_cast<const MayAlias<To> *>(x);
|
||||
}
|
||||
|
||||
/**\internal
|
||||
* This enumeration lists all possible operators in C++.
|
||||
*
|
||||
* The assignment and compound assignment enumerators are used with the conditional_assign
|
||||
* implementation.
|
||||
*/
|
||||
enum class Operator : char {
|
||||
Assign,
|
||||
Multiply,
|
||||
MultiplyAssign,
|
||||
Divide,
|
||||
DivideAssign,
|
||||
Remainder,
|
||||
RemainderAssign,
|
||||
Plus,
|
||||
PlusAssign,
|
||||
Minus,
|
||||
MinusAssign,
|
||||
RightShift,
|
||||
RightShiftAssign,
|
||||
LeftShift,
|
||||
LeftShiftAssign,
|
||||
And,
|
||||
AndAssign,
|
||||
Xor,
|
||||
XorAssign,
|
||||
Or,
|
||||
OrAssign,
|
||||
PreIncrement,
|
||||
PostIncrement,
|
||||
PreDecrement,
|
||||
PostDecrement,
|
||||
LogicalAnd,
|
||||
LogicalOr,
|
||||
Comma,
|
||||
UnaryPlus,
|
||||
UnaryMinus,
|
||||
UnaryNot,
|
||||
UnaryOnesComplement,
|
||||
CompareEqual,
|
||||
CompareNotEqual,
|
||||
CompareLess,
|
||||
CompareGreater,
|
||||
CompareLessEqual,
|
||||
CompareGreaterEqual
|
||||
};
|
||||
|
||||
// forward declaration for Vc::array in <Vc/array>
|
||||
template <typename T, std::size_t N> struct array;
|
||||
// forward declaration for Vc::span in <Vc/span>
|
||||
namespace Common {
|
||||
template <typename T, std::ptrdiff_t N> class span;
|
||||
}
|
||||
|
||||
/* TODO: add type for half-float, something along these lines:
|
||||
class half_float
|
||||
{
|
||||
uint16_t data;
|
||||
public:
|
||||
constexpr half_float() : data(0) {}
|
||||
constexpr half_float(const half_float &) = default;
|
||||
constexpr half_float(half_float &&) = default;
|
||||
constexpr half_float &operator=(const half_float &) = default;
|
||||
|
||||
constexpr explicit half_float(float);
|
||||
constexpr explicit half_float(double);
|
||||
constexpr explicit half_float(int);
|
||||
constexpr explicit half_float(unsigned int);
|
||||
|
||||
explicit operator float () const;
|
||||
explicit operator double () const;
|
||||
explicit operator int () const;
|
||||
explicit operator unsigned int() const;
|
||||
|
||||
bool operator==(half_float rhs) const;
|
||||
bool operator!=(half_float rhs) const;
|
||||
bool operator>=(half_float rhs) const;
|
||||
bool operator<=(half_float rhs) const;
|
||||
bool operator> (half_float rhs) const;
|
||||
bool operator< (half_float rhs) const;
|
||||
|
||||
half_float operator+(half_float rhs) const;
|
||||
half_float operator-(half_float rhs) const;
|
||||
half_float operator*(half_float rhs) const;
|
||||
half_float operator/(half_float rhs) const;
|
||||
};
|
||||
*/
|
||||
|
||||
// TODO: the following doesn't really belong into the toplevel Vc namespace.
|
||||
#ifndef Vc_CHECK_ALIGNMENT
|
||||
template<typename _T> static Vc_ALWAYS_INLINE void assertCorrectAlignment(const _T *){}
|
||||
#else
|
||||
template<typename _T> static Vc_ALWAYS_INLINE void assertCorrectAlignment(const _T *ptr)
|
||||
{
|
||||
const size_t s = alignof(_T);
|
||||
if((reinterpret_cast<size_t>(ptr) & ((s ^ (s & (s - 1))) - 1)) != 0) {
|
||||
fprintf(stderr, "A vector with incorrect alignment has just been created. Look at the stacktrace to find the guilty object.\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Common
|
||||
{
|
||||
// defined in common/simdarrayhelper.h
|
||||
template <typename T, std::size_t Pieces, std::size_t Index> struct Segment;
|
||||
|
||||
/**
|
||||
* \internal
|
||||
*
|
||||
* Helper interface to make m_indexes in InterleavedMemoryAccessBase behave like an integer vector.
|
||||
* Only that the entries are successive entries from the given start index.
|
||||
*/
|
||||
template<size_t StructSize> class SuccessiveEntries
|
||||
{
|
||||
#ifdef Vc_MSVC
|
||||
// scatterinterleavedmemory fails with garbage values in m_first if size_type is a
|
||||
// 64-bit integer type. Using a 32-bit type seems to work around the miscompilation.
|
||||
using size_type = unsigned;
|
||||
#else
|
||||
using size_type = size_t;
|
||||
#endif
|
||||
const size_type m_first;
|
||||
|
||||
public:
|
||||
typedef SuccessiveEntries AsArg;
|
||||
Vc_INTRINSIC SuccessiveEntries(size_type first) : m_first(first) {}
|
||||
Vc_INTRINSIC Vc_PURE size_type operator[](size_type offset) const
|
||||
{
|
||||
return m_first + offset * StructSize;
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE size_type data() const { return m_first; }
|
||||
Vc_INTRINSIC Vc_PURE SuccessiveEntries operator+(const SuccessiveEntries &rhs) const
|
||||
{
|
||||
return SuccessiveEntries(m_first + rhs.m_first);
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE SuccessiveEntries operator*(const SuccessiveEntries &rhs) const
|
||||
{
|
||||
return SuccessiveEntries(m_first * rhs.m_first);
|
||||
}
|
||||
Vc_INTRINSIC Vc_PURE SuccessiveEntries operator<<(size_type x) const
|
||||
{
|
||||
return {m_first << x};
|
||||
}
|
||||
|
||||
friend Vc_INTRINSIC SuccessiveEntries &internal_data(SuccessiveEntries &x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
friend Vc_INTRINSIC const SuccessiveEntries &internal_data(const SuccessiveEntries &x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
// declaration for functions in common/malloc.h
|
||||
template <std::size_t alignment>
|
||||
Vc_INTRINSIC_L void *aligned_malloc(std::size_t n) Vc_INTRINSIC_R;
|
||||
Vc_ALWAYS_INLINE_L void free(void *p) Vc_ALWAYS_INLINE_R;
|
||||
|
||||
/**\internal
|
||||
* Central definition of the type combinations that convert implicitly.
|
||||
*/
|
||||
template <typename Mask, typename T, typename U>
|
||||
using enable_if_mask_converts_implicitly =
|
||||
enable_if<(!std::is_same<Mask, Traits::decay<U>>::value && // that'd be the copy ctor
|
||||
Traits::is_simd_mask<U>::value && !Traits::isSimdMaskArray<U>::value &&
|
||||
Traits::is_implicit_cast_allowed_mask<
|
||||
Traits::entry_type_of<typename Traits::decay<U>::Vector>, T>::value)>;
|
||||
/**\internal
|
||||
* Central definition of the type combinations that only convert explicitly.
|
||||
*/
|
||||
template <typename T, typename U>
|
||||
using enable_if_mask_converts_explicitly = enable_if<(
|
||||
Traits::isSimdMaskArray<U>::value ||
|
||||
(Traits::is_simd_mask<U>::value &&
|
||||
!Traits::is_implicit_cast_allowed_mask<
|
||||
Traits::entry_type_of<typename Traits::decay<U>::Vector>, T>::value))>;
|
||||
|
||||
/**\internal
|
||||
* Tag type for overloading on the width (\VSize{T}) of a vector.
|
||||
*/
|
||||
template <typename T> using WidthT = std::integral_constant<std::size_t, sizeof(T)>;
|
||||
|
||||
// forward declaration of MaskBool in common/maskbool.h
|
||||
template <std::size_t Bytes> class MaskBool;
|
||||
|
||||
// forward declaration of SubscriptOperation in common/subscript.h
|
||||
template <typename T, typename IndexVector, typename Scale, bool>
|
||||
class SubscriptOperation;
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* Helper type to pass along the two arguments for a gather operation.
|
||||
*
|
||||
* \tparam IndexVector Normally an integer SIMD vector, but an array or std::vector also
|
||||
* works (though often not as efficient).
|
||||
*/
|
||||
template <class T, class IndexVector, int Scale = 1>
|
||||
struct GatherArguments {
|
||||
static_assert(std::is_same<T, remove_cvref_t<T>>::value && !std::is_pointer<T>::value,
|
||||
"GatherArguments expects an cv unqualified non-ref/ptr type");
|
||||
const IndexVector indexes;
|
||||
const T *const address;
|
||||
};
|
||||
template <int Scale, class T, class I>
|
||||
GatherArguments<T, I, Scale> make_gather(const T *m, const I &i)
|
||||
{
|
||||
return {i, m};
|
||||
}
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* Helper type to pass along the two arguments for a scatter operation.
|
||||
*
|
||||
* \tparam IndexVector Normally an integer SIMD vector, but an array or std::vector also
|
||||
* works (though often not as efficient).
|
||||
*/
|
||||
template <typename T, typename IndexVector> struct ScatterArguments
|
||||
{
|
||||
const IndexVector indexes;
|
||||
T *const address;
|
||||
};
|
||||
|
||||
/**\internal
|
||||
* Break the recursion of the function below.
|
||||
*/
|
||||
template <typename I, I Begin, I End, typename F>
|
||||
Vc_INTRINSIC enable_if<(Begin >= End), void> unrolled_loop(F &&)
|
||||
{
|
||||
}
|
||||
|
||||
/**\internal
|
||||
* Force the code in the lambda \p f to be called with indexes starting from \p Begin up
|
||||
* to (excluding) \p End to be called without compare and jump instructions (i.e. an
|
||||
* unrolled loop).
|
||||
*/
|
||||
template <typename I, I Begin, I End, typename F>
|
||||
Vc_INTRINSIC Vc_FLATTEN enable_if<(Begin < End), void> unrolled_loop(F &&f)
|
||||
{
|
||||
f(Begin);
|
||||
unrolled_loop<I, Begin + 1, End>(f);
|
||||
}
|
||||
|
||||
/**\internal
|
||||
* Small simplification of the unrolled_loop call for ranges from 0 to \p Size using
|
||||
* std::size_t as the index type.
|
||||
*/
|
||||
template <std::size_t Size, typename F> Vc_INTRINSIC void for_all_vector_entries(F &&f)
|
||||
{
|
||||
unrolled_loop<std::size_t, 0u, Size>(std::forward<F>(f));
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#include "vector.h"
|
||||
#include "mask.h"
|
||||
#include "memoryfwd.h"
|
||||
|
||||
#endif // VC_COMMON_TYPES_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,96 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_UTILITY_H_
|
||||
#define VC_COMMON_UTILITY_H_
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
namespace Common
|
||||
{
|
||||
/**
|
||||
* \internal
|
||||
* Returns the next power of 2 larger than or equal to \p x.
|
||||
*/
|
||||
template <size_t x, bool = (x & (x - 1)) == 0> struct NextPowerOfTwo;
|
||||
template <size_t x>
|
||||
struct NextPowerOfTwo<x, true> : public std::integral_constant<size_t, x> {
|
||||
};
|
||||
template <size_t x>
|
||||
struct NextPowerOfTwo<x, false>
|
||||
: public std::integral_constant<
|
||||
size_t, NextPowerOfTwo<(x | (x >> 1) | (x >> 2) | (x >> 5)) + 1>::value> {
|
||||
};
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* Enforce an upper bound to an alignment value. This is necessary because some compilers
|
||||
* implement such an upper bound and emit a warning if it is encountered.
|
||||
*/
|
||||
template <size_t A>
|
||||
struct BoundedAlignment : public std::integral_constant<size_t,
|
||||
#if defined Vc_MSVC || defined Vc_GCC
|
||||
((A - 1) &
|
||||
#ifdef Vc_MSVC
|
||||
31
|
||||
#elif defined __AVX__
|
||||
255
|
||||
#else
|
||||
127
|
||||
#endif
|
||||
) + 1
|
||||
#else
|
||||
A
|
||||
#endif
|
||||
> {
|
||||
};
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* Returns the size of the left/first SimdArray member.
|
||||
*/
|
||||
template <std::size_t N> static constexpr std::size_t left_size()
|
||||
{
|
||||
return Common::NextPowerOfTwo<(N + 1) / 2>::value;
|
||||
}
|
||||
/**
|
||||
* \internal
|
||||
* Returns the size of the right/second SimdArray member.
|
||||
*/
|
||||
template <std::size_t N> static constexpr std::size_t right_size()
|
||||
{
|
||||
return N - left_size<N>();
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_UTILITY_H_
|
||||
|
||||
// vim: foldmethod=marker
|
|
@ -0,0 +1,857 @@
|
|||
/* This file is part of the Vc library. {{{
|
||||
Copyright © 2015 Matthias Kretz <kretz@kde.org>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the names of contributing organizations nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
}}}*/
|
||||
|
||||
#ifndef VC_COMMON_VECTOR_H_
|
||||
#define VC_COMMON_VECTOR_H_
|
||||
|
||||
#include <ratio>
|
||||
#include "elementreference.h"
|
||||
#include "types.h"
|
||||
#include "vectorabi.h"
|
||||
#include "vectortraits.h"
|
||||
#include "simdarrayfwd.h"
|
||||
#include "loadstoreflags.h"
|
||||
#include "writemaskedvector.h"
|
||||
#include "detail.h"
|
||||
|
||||
namespace Vc_VERSIONED_NAMESPACE
|
||||
{
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Copies the sign(s) of \p sign to the value(s) in \p magnitude and returns the resulting
|
||||
* vector.
|
||||
*
|
||||
* \param magnitude This vector's magnitude will be used in the return vector.
|
||||
* \param sign This vector's sign bit will be used in the return vector.
|
||||
*
|
||||
* \return a value where the sign of the value equals the sign of \p sign. I.e.
|
||||
* `sign(copysign(v, r)) == sign(r)`.
|
||||
*/
|
||||
template <typename T, typename Abi,
|
||||
typename = enable_if<std::is_floating_point<T>::value &&
|
||||
!detail::is_fixed_size_abi<Abi>::value>>
|
||||
inline Vector<T, Abi> copysign(Vector<T, Abi> magnitude, Vector<T, Abi> sign);
|
||||
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Extracts the exponent of each floating-point vector component.
|
||||
*
|
||||
* \param x The vector of values to check for the sign.
|
||||
* \return the exponent to base 2.
|
||||
*
|
||||
* This function provides efficient access to the exponent of the floating point number. The
|
||||
* returned value is a fast approximation to the logarithm of base 2. The absolute error of that
|
||||
* approximation is between [0, 1[.
|
||||
*
|
||||
* Examples:
|
||||
\verbatim
|
||||
value | exponent | log2
|
||||
=======|==========|=======
|
||||
1.0 | 0 | 0
|
||||
2.0 | 1 | 1
|
||||
3.0 | 1 | 1.585
|
||||
3.9 | 1 | 1.963
|
||||
4.0 | 2 | 2
|
||||
4.1 | 2 | 2.036
|
||||
\endverbatim
|
||||
*
|
||||
* \warning This function assumes a positive value (non-zero). If the value is negative the sign bit will
|
||||
* modify the returned value. An input value of zero will return the bias of the floating-point
|
||||
* representation. If you compile with Vc runtime checks, the function will assert
|
||||
* values greater than or equal to zero.
|
||||
*
|
||||
* You may use abs to apply this function to negative values:
|
||||
* \code
|
||||
* exponent(abs(v))
|
||||
* \endcode
|
||||
*/
|
||||
template <typename T, typename Abi,
|
||||
typename = enable_if<std::is_floating_point<T>::value &&
|
||||
!detail::is_fixed_size_abi<Abi>::value>>
|
||||
inline Vector<T, Abi> exponent(Vector<T, Abi> x);
|
||||
|
||||
/**
|
||||
* \ingroup Math
|
||||
* Returns for each vector component whether it stores a negative value.
|
||||
*
|
||||
* \param x The vector of values to check for the sign.
|
||||
* \returns a mask which is \c true only in those components that are negative in \p x.
|
||||
*/
|
||||
template <typename T, typename Abi>
|
||||
Vc_INTRINSIC Vc_CONST typename Vector<T, detail::not_fixed_size_abi<Abi>>::MaskType
|
||||
isnegative(Vector<T, Abi> x)
|
||||
{
|
||||
return x < Vector<T, Abi>::Zero();
|
||||
}
|
||||
|
||||
/**
|
||||
* \class Vector types.h <Vc/vector.h>
|
||||
* \ingroup Vectors
|
||||
*
|
||||
* The main vector class for expressing data parallelism.
|
||||
*
|
||||
* are specializations of this class.
|
||||
* For most cases there are no API differences for the specializations.
|
||||
* Make use of Vector<T> for generic programming, otherwise you might prefer to use
|
||||
* the \p *_v aliases.
|
||||
*
|
||||
* \see Vc::float_v, Vc::double_v, Vc::int_v, Vc::uint_v, Vc::short_v, Vc::ushort_v
|
||||
* \see Mask
|
||||
*/
|
||||
template<typename T, typename Abi = VectorAbi::Best<T>> class Vector
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Returns the number of scalar components (\VSize{T}) in a vector of this type.
|
||||
*
|
||||
* The size of the vector. I.e. the number of scalar entries in the vector. Do not
|
||||
* make any assumptions about the size of vectors. If you need vectors of \c float and
|
||||
* \c int types use Vector::IndexType or SimdArray.
|
||||
*
|
||||
* You can easily use if clauses to compare Vector sizes. The compiler can
|
||||
* statically evaluate and fully optimize dead code away (very much like \#ifdef, but
|
||||
* with syntax checking).
|
||||
*
|
||||
* \returns The number of components (i.e. \VSize{T}) objects of this vector type
|
||||
* store and manipulate.
|
||||
*/
|
||||
static constexpr size_t size() { return VectorTraits<T, Abi>::size(); }
|
||||
|
||||
/**
|
||||
* Specifies the alignment requirement for aligned load and store calls for objects of
|
||||
* this vector type.
|
||||
*/
|
||||
static constexpr size_t MemoryAlignment = VectorTraits<T, Abi>::memoryAlignment();
|
||||
|
||||
/// The ABI tag type of the current template instantiation.
|
||||
using abi = Abi;
|
||||
|
||||
/// The type of the entries in the vector.
|
||||
using EntryType = typename VectorTraits<T, Abi>::EntryType;
|
||||
/// \copydoc EntryType
|
||||
using value_type = EntryType;
|
||||
|
||||
using VectorEntryType = typename VectorTraits<T, Abi>::VectorEntryType;
|
||||
/**\internal
|
||||
* This type reveals the implementation-specific type used for the data member.
|
||||
*/
|
||||
using VectorType = typename VectorTraits<T, Abi>::VectorType;
|
||||
/**\internal
|
||||
* \copydoc VectorType
|
||||
*/
|
||||
using vector_type = VectorType;
|
||||
|
||||
/// The type of the mask used for masked operations and returned from comparisons.
|
||||
using MaskType = Vc::Mask<T, Abi>;
|
||||
/// \copydoc MaskType
|
||||
using mask_type = MaskType;
|
||||
|
||||
using MaskArgument = MaskType;
|
||||
using VectorArgument = Vector;
|
||||
|
||||
/// The type of the vector used for indexes in gather and scatter operations.
|
||||
using IndexType = Vc::fixed_size_simd<int, VectorTraits<T, Abi>::size()>;
|
||||
/// \copydoc IndexType
|
||||
using index_type = IndexType;
|
||||
|
||||
using reference = Detail::ElementReference<Vector>;
|
||||
|
||||
/// \name Generators
|
||||
///@{
|
||||
/**
|
||||
* Returns a vector with the entries initialized to zero.
|
||||
*/
|
||||
static inline Vector Zero();
|
||||
|
||||
/**
|
||||
* Returns a vector with the entries initialized to one.
|
||||
*/
|
||||
static inline Vector One();
|
||||
|
||||
/**
|
||||
* Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
|
||||
*/
|
||||
static inline Vector IndexesFromZero();
|
||||
|
||||
/**
|
||||
* Returns a vector with pseudo-random entries.
|
||||
*
|
||||
* Currently the state of the random number generator cannot be modified and starts
|
||||
* off with the same state. Thus you will get the same sequence of numbers for the
|
||||
* same sequence of calls.
|
||||
*
|
||||
* \return a new random vector. Floating-point values will be in the 0-1 range.
|
||||
* Integers will use the full range the integer representation allows.
|
||||
*
|
||||
* \note This function may use a very small amount of state and thus will be a weak
|
||||
* random number generator.
|
||||
*/
|
||||
static inline Vector Random();
|
||||
|
||||
/// Generate a vector object from return values of \p gen (static variant of \ref fill).
|
||||
template <typename G> static inline Vector generate(G gen);
|
||||
///@}
|
||||
|
||||
/// \name Compile-Time Constant Initialization
|
||||
///@{
|
||||
/**
|
||||
* Construct a zero-initialized vector object.
|
||||
*
|
||||
* This constructor follows the behavior of the underlying arithmetic type \p T in
|
||||
* that the expression `T()` zero-initializes the object. On the other hand the
|
||||
* variable \c x in `T x;` is uninitialized.
|
||||
* Since, for class types, both expressions call the default constructor `Vector<T> x`
|
||||
* must zero-initialize \c x as well.
|
||||
*/
|
||||
inline Vector() = default;
|
||||
|
||||
/**
|
||||
* Construct a vector with the entries initialized to zero.
|
||||
*
|
||||
* \see Vc::Zero, Zero()
|
||||
*/
|
||||
explicit inline Vector(VectorSpecialInitializerZero);
|
||||
|
||||
/**
|
||||
* Construct a vector with the entries initialized to one.
|
||||
*
|
||||
* \see Vc::One, One()
|
||||
*/
|
||||
explicit inline Vector(VectorSpecialInitializerOne);
|
||||
|
||||
/**
|
||||
* Construct a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
|
||||
*
|
||||
* \see Vc::IndexesFromZero, IndexesFromZero()
|
||||
*/
|
||||
explicit inline Vector(VectorSpecialInitializerIndexesFromZero);
|
||||
///@}
|
||||
|
||||
/// \name Conversion/Broadcast Constructors
|
||||
///@{
|
||||
/**
|
||||
* Implict conversion from compatible Vector<U, Abi> types.
|
||||
*/
|
||||
template <typename U>
|
||||
inline Vector(Vector<U, abi> x,
|
||||
enable_if<Traits::is_implicit_cast_allowed<U, T>::value> = nullarg);
|
||||
|
||||
#if Vc_IS_VERSION_1
|
||||
/**
|
||||
* Explicit conversion (i.e. `static_cast`) from the remaining Vector<U, Abi> types.
|
||||
*
|
||||
* \param x A vector object to use for initialization of the new vector object. If \p
|
||||
* x contains more entries than the new object the high components will be
|
||||
* ignored. If \p x contains fewer entries than the new object the high
|
||||
* components of the new object will be zero-initialized. Type conversion is
|
||||
* done according to the standard conversion rules for the underlying
|
||||
* fundamental arithmetic types.
|
||||
*/
|
||||
template <typename U>
|
||||
Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
|
||||
"vector types") inline explicit Vector(
|
||||
Vector<U, abi> x,
|
||||
enable_if<!Traits::is_implicit_cast_allowed<U, T>::value> = nullarg);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Broadcast Constructor.
|
||||
*
|
||||
* Constructs a vector with all entries of the vector filled with the given value.
|
||||
*
|
||||
* \param a The scalar value to broadcast to all entries of the constructed vector.
|
||||
*/
|
||||
inline Vector(EntryType a);
|
||||
template <typename U>
|
||||
inline Vector(U a, enable_if<std::is_same<U, int>::value &&
|
||||
!std::is_same<U, EntryType>::value> = nullarg);
|
||||
inline explicit Vector(reference a);
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \name Loads & Stores
|
||||
*/
|
||||
///@{
|
||||
#include "../common/loadinterface.h"
|
||||
#include "../common/storeinterface.h"
|
||||
///@}
|
||||
|
||||
/**
|
||||
* Set all entries to zero.
|
||||
*/
|
||||
inline void setZero();
|
||||
|
||||
/**
|
||||
* Set all entries to zero where the mask is set.
|
||||
*
|
||||
* A 4-vector with a mask of `[0111]` therefore would set the last three entries to 0.
|
||||
*
|
||||
* \param mask Selects the entries to be set to zero.
|
||||
*/
|
||||
inline void setZero(MaskType mask);
|
||||
|
||||
/**
|
||||
* Set all entries to zero where the mask is not set.
|
||||
*
|
||||
* A 4-vector with a mask of `[0111]` therefore would set only the first entry to 0.
|
||||
*
|
||||
* \param mask Selects the entries to not be set to zero.
|
||||
*/
|
||||
inline void setZeroInverted(MaskType mask);
|
||||
|
||||
/**
|
||||
* Set all entries to the bit representation of a QNaN.
|
||||
*/
|
||||
inline void setQnan();
|
||||
|
||||
/**
|
||||
* Set all entries to the bit representation of a QNaN where the mask is set.
|
||||
*
|
||||
* \param mask Selects the entries to be set to QNaN.
|
||||
*/
|
||||
inline void setQnan(MaskType mask);
|
||||
|
||||
#define Vc_CURRENT_CLASS_NAME Vector
|
||||
#include "../common/gatherinterface.h"
|
||||
#include "../common/scatterinterface.h"
|
||||
#undef Vc_CURRENT_CLASS_NAME
|
||||
|
||||
/// \name Scalar Subscript Operators
|
||||
///@{
|
||||
/**
|
||||
* This operator can be used to modify scalar entries of the vector.
|
||||
*
|
||||
* \param index A value between 0 and Size. This value is not checked internally so
|
||||
* you must make/be sure it is in range.
|
||||
*
|
||||
* \return a reference to the vector entry at the given \p index.
|
||||
*
|
||||
* \warning The use of this function may result in suboptimal performance. Please
|
||||
* check whether you can find a more vector-friendly way to do what you
|
||||
* intended.
|
||||
* \note the returned object models the concept of a reference and
|
||||
* as such it can exist longer than the data it is referencing.
|
||||
* \note to avoid lifetime issues, we strongly advice not to store
|
||||
* any reference objects.
|
||||
*/
|
||||
inline reference operator[](size_t index) noexcept;
|
||||
/**
|
||||
* This operator can be used to read scalar entries of the vector.
|
||||
*
|
||||
* \param index A value between 0 and Size. This value is not checked internally so
|
||||
* you must make/be sure it is in range.
|
||||
*
|
||||
* \return a copy of the vector entry at the given \p index.
|
||||
*/
|
||||
inline EntryType operator[](size_t index) const noexcept;
|
||||
///@}
|
||||
|
||||
/// \name Unary Operators
|
||||
///@{
|
||||
/**
|
||||
* Determine where the vector is null.
|
||||
*
|
||||
* \returns a mask which denotes the zero entries of this vector object.
|
||||
*/
|
||||
inline MaskType operator!() const;
|
||||
|
||||
/**
|
||||
* Inverts all bits.
|
||||
*
|
||||
* \returns a new vector which has all bits inverted. I.e. `v & ~v == 0`.
|
||||
*
|
||||
* \note This operator is only defined for integral types \p T.
|
||||
*/
|
||||
inline Vector operator~() const;
|
||||
|
||||
/// Returns a new vector object with all entries negated.
|
||||
inline Vector operator-() const;
|
||||
/// Returns a copy of the vector object.
|
||||
inline Vector operator+() const;
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \name Increment and Decrement Operators
|
||||
* The increment and decrement operators apply the increment/decrement operation per
|
||||
* component.
|
||||
*
|
||||
* The semantics are equal to the semantics of the fundamental arithmetics type \p T.
|
||||
*
|
||||
* \note Over-/Underflow of signed integral types is undefined behavior and may
|
||||
* actually break your code.
|
||||
*/
|
||||
///@{
|
||||
inline Vector &operator++(); // prefix
|
||||
inline Vector operator++(int); // postfix
|
||||
inline Vector &operator--(); // prefix
|
||||
inline Vector operator--(int); // postfix
|
||||
///@}
|
||||
|
||||
#define Vc_OP(symbol) \
|
||||
inline Vc_PURE Vector operator symbol(const Vector &x) const;
|
||||
/**
|
||||
* \name Arithmetic Operations
|
||||
*
|
||||
* The arithmetic operations are implemented as component-wise
|
||||
* application of the operator on the two vector objects.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void foo(float_v a, float_v b) {
|
||||
* const float_v product = a * b;
|
||||
* const float_v difference = a - b;
|
||||
* a += b;
|
||||
* auto quotient = a / b;
|
||||
* auto modulo = static_cast<int_v>(a) % static_cast<int_v>(b);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* \param x The vector to add, subtract, multiply, or divide by.
|
||||
* \returns A vector object of the same type with the components filled according to a
|
||||
* component-wise application of the operator.
|
||||
*
|
||||
* \note If a signed integral vector operation overflows the result is undefined.
|
||||
* (which is in agreement to the behavior of the fundamental signed integral types in
|
||||
* C++)
|
||||
*/
|
||||
///@{
|
||||
Vc_ALL_ARITHMETICS(Vc_OP);
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \name Binary Operations
|
||||
*
|
||||
* The binary operations are implemented as component-wise
|
||||
* application of the operator on the two vector objects.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void foo(int_v a, int_v b) {
|
||||
* const int_v combined_bits = a | b;
|
||||
* const int_v masked_bits = a & b;
|
||||
* a ^= b; // flipped bits
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* \returns A vector object of the same type with the components filled according to a
|
||||
* component-wise application of the operator.
|
||||
*/
|
||||
///@{
|
||||
Vc_ALL_BINARY(Vc_OP);
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \name Shift Operations
|
||||
*
|
||||
* The shift operations are implemented as component-wise
|
||||
* application of the operator on the two vector objects.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void foo(int_v a, int_v b) {
|
||||
* const int_v right = a >> b;
|
||||
* a <<= b;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* \returns A vector object of the same type with the components filled according to a
|
||||
* component-wise application of the operator.
|
||||
*/
|
||||
///@{
|
||||
Vc_ALL_SHIFTS(Vc_OP);
|
||||
///@}
|
||||
#undef Vc_OP
|
||||
|
||||
/**
|
||||
* \name Comparisons
|
||||
*
|
||||
* All comparison operators return a mask object.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void foo(const float_v &a, const float_v &b) {
|
||||
* const float_m mask = a < b;
|
||||
* ...
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* \param x The vector to compare against.
|
||||
* \returns A mask object. Its components contain the boolean results of the
|
||||
* component-wise compare operation.
|
||||
*/
|
||||
///@{
|
||||
#define Vc_CMP_OP(symbol) inline Vc_PURE MaskType operator symbol(const Vector &x) const;
|
||||
Vc_ALL_COMPARES(Vc_CMP_OP);
|
||||
#undef Vc_CMP_OP
|
||||
///@}
|
||||
|
||||
/**
|
||||
* Writemask the vector before an assignment.
|
||||
*
|
||||
* \param mask The writemask to be used.
|
||||
*
|
||||
* \return an object that can be used for any kind of masked assignment.
|
||||
*
|
||||
* The returned object is only to be used for assignments and should not be assigned
|
||||
* to a variable.
|
||||
*
|
||||
* Examples:
|
||||
* \code
|
||||
* float_v v = float_v::Zero(); // v = [0, 0, 0, 0]
|
||||
* int_v v2 = int_v::IndexesFromZero(); // v2 = [0, 1, 2, 3]
|
||||
* v(v2 < 2) = 1.f; // v = [1, 1, 0, 0]
|
||||
* v(v2 < 3) += 1.f; // v = [2, 2, 1, 0]
|
||||
* ++v2(v < 1.f); // v2 = [0, 1, 2, 4]
|
||||
* \endcode
|
||||
*/
|
||||
inline Common::WriteMaskedVector<Vector, MaskType> operator()(MaskType mask);
|
||||
|
||||
/**
|
||||
* \name Horizontal Reduction Operations
|
||||
*
|
||||
* Horizontal operations can be used to reduce the values of a vector to a scalar
|
||||
* value.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void foo(const float_v &v) {
|
||||
* float min = v.min(); // smallest value in v
|
||||
* float sum = v.sum(); // sum of all values in v
|
||||
* }
|
||||
* \endcode
|
||||
*/
|
||||
///@{
|
||||
|
||||
/// Returns the smallest entry in the vector.
|
||||
inline EntryType min() const;
|
||||
/// Returns the largest entry in the vector.
|
||||
inline EntryType max() const;
|
||||
/// Returns the product of all entries in the vector.
|
||||
inline EntryType product() const;
|
||||
/// Returns the sum of all entries in the vector.
|
||||
inline EntryType sum() const;
|
||||
/// Returns a vector containing the sum of all entries with smaller index.
|
||||
inline Vector partialSum() const;
|
||||
/// Returns the smallest entry of the vector components selected by \p mask.
|
||||
inline EntryType min(MaskType mask) const;
|
||||
/// Returns the largest entry of the vector components selected by \p mask.
|
||||
inline EntryType max(MaskType mask) const;
|
||||
/// Returns the product of the vector components selected by \p mask.
|
||||
inline EntryType product(MaskType mask) const;
|
||||
/// Returns the sum of the vector components selected by \p mask.
|
||||
inline EntryType sum(MaskType mask) const;
|
||||
///@}
|
||||
|
||||
/**
|
||||
* \name Shift and Rotate
|
||||
*
|
||||
* These functions allow to shift or rotate the entries in a vector.
|
||||
*
|
||||
* All functions with an \p amount parameter support positive and negative numbers for
|
||||
* the shift/rotate value.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* using namespace Vc;
|
||||
* int_v foo = int_v::IndexesFromZero() + 1; // e.g. [1, 2, 3, 4] with SSE
|
||||
* int_v x;
|
||||
* x = foo.shifted( 1); // [2, 3, 4, 0]
|
||||
* x = foo.shifted( 2); // [3, 4, 0, 0]
|
||||
* x = foo.shifted( 3); // [4, 0, 0, 0]
|
||||
* x = foo.shifted( 4); // [0, 0, 0, 0]
|
||||
* x = foo.shifted(-1); // [0, 1, 2, 3]
|
||||
* x = foo.shifted(-2); // [0, 0, 1, 2]
|
||||
* x = foo.shifted(-3); // [0, 0, 0, 1]
|
||||
* x = foo.shifted(-4); // [0, 0, 0, 0]
|
||||
*
|
||||
* x = foo.rotated( 1); // [2, 3, 4, 1]
|
||||
* x = foo.rotated( 2); // [3, 4, 1, 2]
|
||||
* x = foo.rotated( 3); // [4, 1, 2, 3]
|
||||
* x = foo.rotated( 4); // [1, 2, 3, 4]
|
||||
* x = foo.rotated(-1); // [4, 1, 2, 3]
|
||||
* x = foo.rotated(-2); // [3, 4, 1, 2]
|
||||
* x = foo.rotated(-3); // [2, 3, 4, 1]
|
||||
* x = foo.rotated(-4); // [1, 2, 3, 4]
|
||||
* \endcode
|
||||
*
|
||||
* These functions are slightly related to the above swizzles. In any case, they are
|
||||
* often useful for communication between SIMD lanes or binary decoding operations.
|
||||
*
|
||||
* \warning Use of these functions leads to less portable code. Consider the scalar
|
||||
* implementation where every vector has only one entry. The shift and rotate
|
||||
* functions have no useful task to fulfil there and you will almost certainly not get
|
||||
* any useful results. It is recommended to add a static_assert for the assumed
|
||||
* minimum vector size.
|
||||
*/
|
||||
///@{
|
||||
|
||||
/// Shift vector entries to the left by \p amount; shifting in zeros.
|
||||
inline Vector shifted(int amount) const;
|
||||
/**
|
||||
* Shift vector entries to the left by \p amount; shifting in values from shiftIn
|
||||
* (instead of zeros).
|
||||
*
|
||||
* This function can be used to create vectors from unaligned memory locations.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* Vc::Memory<int_v, 256> mem;
|
||||
* for (int i = 0; i < 256; ++i) { mem[i] = i + 1; }
|
||||
* int_v a = mem.vectorAt(0);
|
||||
* int_v b = mem.vectorAt(int_v::Size);
|
||||
* int_v x = a.shifted(1, b);
|
||||
* // now x == mem.vectorAt(1, Vc::Unaligned)
|
||||
* \endcode
|
||||
*
|
||||
* \param amount The number of entries to shift by. \p amount must be between \c
|
||||
* -Size and \c Size, otherwise the result is undefined.
|
||||
* \param shiftIn The vector of values to shift in.
|
||||
* \return A new vector with values from \p this and \p shiftIn concatenated
|
||||
* and then shifted by \p amount.
|
||||
*/
|
||||
inline Vector shifted(int amount, Vector shiftIn) const;
|
||||
/// Rotate vector entries to the left by \p amount.
|
||||
inline Vector rotated(int amount) const;
|
||||
/// Returns a vector with all components reversed.
|
||||
inline Vector reversed() const;
|
||||
///@}
|
||||
|
||||
/**
|
||||
* Return a sorted copy of the vector.
|
||||
*
|
||||
* \returns a sorted vector. The returned values are in ascending order:
|
||||
\verbatim
|
||||
v[0] <= v[1] <= v[2] <= v[3] ...
|
||||
\endverbatim
|
||||
*
|
||||
* \note If the vector contains NaNs the result is undefined.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* int_v v = int_v::Random();
|
||||
* int_v s = v.sorted();
|
||||
* std::cout << v << '\n' << s << '\n';
|
||||
* \endcode
|
||||
*
|
||||
* With SSE the output would be:
|
||||
*
|
||||
\verbatim
|
||||
[1513634383, -963914658, 1763536262, -1285037745]
|
||||
[-1285037745, -963914658, 1513634383, 1763536262]
|
||||
\endverbatim
|
||||
*
|
||||
* With the Scalar implementation:
|
||||
\verbatim
|
||||
[1513634383]
|
||||
[1513634383]
|
||||
\endverbatim
|
||||
*/
|
||||
inline Vector sorted() const;
|
||||
|
||||
/*!
|
||||
* \name Apply/Call/Fill Functions
|
||||
*
|
||||
* There are still many situations where the code needs to switch from SIMD operations
|
||||
* to scalar execution. In this case you can, of course rely on operator[]. But there
|
||||
* are also a number of functions that can help with common patterns.
|
||||
*
|
||||
* The apply functions expect a function that returns a scalar value, i.e. a function
|
||||
* of the form "T f(T)". The call functions do not return a value and thus the
|
||||
* function passed does not need a return value. The fill functions are used to
|
||||
* serially set the entries of the vector from the return values of a function.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* void foo(float_v v) {
|
||||
* float_v logarithm = v.apply(std::log);
|
||||
* float_v exponential = v.apply(std::exp);
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* Of course, you can also use lambdas here:
|
||||
* \code
|
||||
* float_v power = v.apply([](float f) { return std::pow(f, 0.6f); })
|
||||
* \endcode
|
||||
*
|
||||
* \param f A functor: this can either be a function or an object that implements
|
||||
* operator().
|
||||
*/
|
||||
///@{
|
||||
|
||||
/// Call \p f sequentially, starting with the minimum up to the maximum value.
|
||||
template <typename F> void callWithValuesSorted(F &&f);
|
||||
/// Call \p f with the scalar entries of the vector.
|
||||
template <typename F> inline void call(F &&f) const;
|
||||
/// As above, but skip the entries where \p mask is not set.
|
||||
template <typename F> inline void call(F &&f, MaskType mask) const;
|
||||
|
||||
/// Call \p f on every entry of the vector and return the results as a new vector.
|
||||
template <typename F> inline Vector apply(F &&f) const;
|
||||
/// As above, but skip the entries where \p mask is not set.
|
||||
template <typename F> inline Vector apply(F &&f, MaskType mask) const;
|
||||
|
||||
/// Fill the vector with the values [f(0), f(1), f(2), ...].
|
||||
template <typename IndexT> inline void fill(EntryType(&f)(IndexT));
|
||||
/// Fill the vector with the values [f(), f(), f(), ...].
|
||||
inline void fill(EntryType(&f)());
|
||||
///@}
|
||||
|
||||
/**\internal
|
||||
* Interleaves this vector and \p x and returns the resulting low vector.
|
||||
* Used to implement Vc::interleave.
|
||||
*/
|
||||
inline Vector interleaveLow(Vector x) const;
|
||||
/**\internal
|
||||
* Interleaves this vector and \p x and returns the resulting low vector.
|
||||
* Used to implement Vc::interleave.
|
||||
*/
|
||||
inline Vector interleaveHigh(Vector x) const;
|
||||
|
||||
/**\internal
|
||||
* Assigns the components of \p v where \p m is \c true.
|
||||
*/
|
||||
inline void assign(const Vector &v, const MaskType &m);
|
||||
|
||||
/**
|
||||
* \internal
|
||||
* \name Internal Data Access
|
||||
* Returns a (const) reference the internal data member, storing the vector data.
|
||||
*/
|
||||
///@{
|
||||
inline VectorType &data();
|
||||
inline const VectorType &data() const;
|
||||
///@}
|
||||
|
||||
/// \name Deprecated Members
|
||||
///@{
|
||||
|
||||
/**
|
||||
* Returns the exponents of the floating-point values in the vector.
|
||||
*
|
||||
* \return A new vector object of the same type containing the exponents.
|
||||
*
|
||||
* \deprecated use Vc::exponent instead.
|
||||
*/
|
||||
Vc_DEPRECATED("use exponent(x) instead") inline Vector exponent() const;
|
||||
|
||||
/**
|
||||
* Returns whether a value is negative.
|
||||
*
|
||||
* \return A new mask object indicating the sign of each vector element.
|
||||
*
|
||||
* \deprecated use Vc::isnegative instead.
|
||||
*/
|
||||
Vc_DEPRECATED("use isnegative(x) instead") inline MaskType isNegative() const;
|
||||
|
||||
///\copydoc size
|
||||
///\deprecated Use Vc::Vector::size instead.
|
||||
static constexpr size_t Size = VectorTraits<T, Abi>::size();
|
||||
|
||||
/**
|
||||
* Casts the current object to \p V2.
|
||||
*
|
||||
* \returns a converted object of type \p Vc.
|
||||
*
|
||||
* \deprecated Use Vc::simd_cast instead.
|
||||
*/
|
||||
template <typename V2> inline V2 staticCast() const;
|
||||
|
||||
/**
|
||||
* reinterpret_cast the vector components to construct a vector of type \p V2.
|
||||
*
|
||||
* \returns An object of type \p V2 with the smae bit-representation.
|
||||
*
|
||||
* \deprecated use Vc::reinterpret_components_cast instead.
|
||||
*/
|
||||
template <typename V2>
|
||||
Vc_DEPRECATED("use reinterpret_components_cast instead") inline V2
|
||||
reinterpretCast() const;
|
||||
|
||||
/**
|
||||
* Copies the signs of the components of \p reference to the components of the current
|
||||
* vector, returning the result.
|
||||
*
|
||||
* \param reference A vector object that determines the sign of the the result.
|
||||
* \returns A new vector with sign taken from \p reference and absolute value taken
|
||||
* from the current vector object.
|
||||
*
|
||||
* \deprecated Use Vc::copysign instead.
|
||||
*/
|
||||
Vc_DEPRECATED("use copysign(x, y) instead") inline Vector
|
||||
copySign(Vector reference) const;
|
||||
///@}
|
||||
|
||||
Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(Vector));
|
||||
|
||||
private:
|
||||
VectorType d;
|
||||
};
|
||||
|
||||
/**
|
||||
* \ingroup Utilities
|
||||
* Constructs a new Vector object of type \p V from the Vector \p x, reinterpreting the
|
||||
* bits of \p x for the new type \p V.
|
||||
*
|
||||
* This function is only applicable if:
|
||||
* - the \c sizeof of the input and output types is equal
|
||||
* - the Vector::size() of the input and output types is equal
|
||||
* - the \c VectorEntryTypes of input and output have equal \c sizeof
|
||||
*
|
||||
* \tparam V The requested type to change \p x into.
|
||||
* \param x The Vector to reinterpret as an object of type \p V.
|
||||
* \returns A new object (rvalue) of type \p V.
|
||||
*
|
||||
* \warning This cast is non-portable since the applicability (see above) may change
|
||||
* depending on the default vector types of the target platform. The function is perfectly
|
||||
* safe to use with fully specified \p Abi, though.
|
||||
*/
|
||||
template <typename V, typename T, typename Abi>
|
||||
Vc_ALWAYS_INLINE Vc_CONST enable_if<
|
||||
(V::size() == Vector<T, Abi>::size() &&
|
||||
sizeof(typename V::VectorEntryType) ==
|
||||
sizeof(typename Vector<T, Abi>::VectorEntryType) &&
|
||||
sizeof(V) == sizeof(Vector<T, Abi>) && alignof(V) <= alignof(Vector<T, Abi>)),
|
||||
V>
|
||||
reinterpret_components_cast(const Vector<T, Abi> &x)
|
||||
{
|
||||
return reinterpret_cast<const V &>(x);
|
||||
}
|
||||
|
||||
#define Vc_OP(symbol) \
|
||||
template <typename T, typename Abi> \
|
||||
inline Vector<T, Abi> &operator symbol##=(Vector<T, Abi> &, \
|
||||
const Vector<T, Abi> &x);
|
||||
//Vc_ALL_ARITHMETICS(Vc_OP);
|
||||
//Vc_ALL_BINARY(Vc_OP);
|
||||
//Vc_ALL_SHIFTS(Vc_OP);
|
||||
#undef Vc_OP
|
||||
|
||||
} // namespace Vc
|
||||
|
||||
#endif // VC_COMMON_VECTOR_H_
|
||||
|
||||
// vim: foldmethod=marker
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue