LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1/*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_atomic.h"
14#include "kmp.h" // TRUE, asm routines prototypes
15
16typedef unsigned char uchar;
17typedef unsigned short ushort;
18
561/*
562 * Global vars
563 */
564
565#ifndef KMP_GOMP_COMPAT
566int __kmp_atomic_mode = 1; // Intel perf
567#else
568int __kmp_atomic_mode = 2; // GOMP compatibility
569#endif /* KMP_GOMP_COMPAT */
570
571KMP_ALIGN(128)
572
573// Control access to all user coded atomics in Gnu compat mode
574kmp_atomic_lock_t __kmp_atomic_lock;
575// Control access to all user coded atomics for 1-byte fixed data types
576kmp_atomic_lock_t __kmp_atomic_lock_1i;
577// Control access to all user coded atomics for 2-byte fixed data types
578kmp_atomic_lock_t __kmp_atomic_lock_2i;
579// Control access to all user coded atomics for 4-byte fixed data types
580kmp_atomic_lock_t __kmp_atomic_lock_4i;
581// Control access to all user coded atomics for kmp_real32 data type
582kmp_atomic_lock_t __kmp_atomic_lock_4r;
583// Control access to all user coded atomics for 8-byte fixed data types
584kmp_atomic_lock_t __kmp_atomic_lock_8i;
585// Control access to all user coded atomics for kmp_real64 data type
586kmp_atomic_lock_t __kmp_atomic_lock_8r;
587// Control access to all user coded atomics for complex byte data type
588kmp_atomic_lock_t __kmp_atomic_lock_8c;
589// Control access to all user coded atomics for long double data type
590kmp_atomic_lock_t __kmp_atomic_lock_10r;
591// Control access to all user coded atomics for _Quad data type
592kmp_atomic_lock_t __kmp_atomic_lock_16r;
593// Control access to all user coded atomics for double complex data type
594kmp_atomic_lock_t __kmp_atomic_lock_16c;
595// Control access to all user coded atomics for long double complex type
596kmp_atomic_lock_t __kmp_atomic_lock_20c;
597// Control access to all user coded atomics for _Quad complex data type
598kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600/* 2007-03-02:
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605#define KMP_ATOMIC_VOLATILE volatile
606
607#if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
609static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610 lhs.q += rhs.q;
611}
612static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613 lhs.q -= rhs.q;
614}
615static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616 lhs.q *= rhs.q;
617}
618static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619 lhs.q /= rhs.q;
620}
621static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
623}
624static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
626}
627
628static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629 lhs.q += rhs.q;
630}
631static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632 lhs.q -= rhs.q;
633}
634static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635 lhs.q *= rhs.q;
636}
637static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638 lhs.q /= rhs.q;
639}
640static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
642}
643static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
645}
646
647static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
648 lhs.q += rhs.q;
649}
650static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651 lhs.q -= rhs.q;
652}
653static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654 lhs.q *= rhs.q;
655}
656static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657 lhs.q /= rhs.q;
658}
659
660static inline void operator+=(kmp_cmplx128_a16_t &lhs,
661 kmp_cmplx128_a16_t &rhs) {
662 lhs.q += rhs.q;
663}
664static inline void operator-=(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
666 lhs.q -= rhs.q;
667}
668static inline void operator*=(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
670 lhs.q *= rhs.q;
671}
672static inline void operator/=(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
674 lhs.q /= rhs.q;
675}
676
677#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
678
679// ATOMIC implementation routines -----------------------------------------
680// One routine for each operation and operand type.
681// All routines declarations looks like
682// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
683
684#define KMP_CHECK_GTID \
685 if (gtid == KMP_GTID_UNKNOWN) { \
686 gtid = __kmp_entry_gtid(); \
687 } // check and get gtid when needed
688
689// Beginning of a definition (provides name, parameters, gebug trace)
690// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
691// fixed)
692// OP_ID - operation identifier (add, sub, mul, ...)
693// TYPE - operands' type
694#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
695 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
696 TYPE *lhs, TYPE rhs) { \
697 KMP_DEBUG_ASSERT(__kmp_init_serial); \
698 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
699
700// ------------------------------------------------------------------------
701// Lock variables used for critical sections for various size operands
702#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
703#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
704#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
705#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
706#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
707#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
708#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
709#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
710#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
711#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
712#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
713#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
714#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
715
716// ------------------------------------------------------------------------
717// Operation on *lhs, rhs bound by critical section
718// OP - operator (it's supposed to contain an assignment)
719// LCK_ID - lock identifier
720// Note: don't check gtid as it should always be valid
721// 1, 2-byte - expect valid parameter, other - check before this macro
722#define OP_CRITICAL(OP, LCK_ID) \
723 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
724 \
725 (*lhs) OP(rhs); \
726 \
727 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
728
729// ------------------------------------------------------------------------
730// For GNU compatibility, we may need to use a critical section,
731// even though it is not required by the ISA.
732//
733// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
734// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
735// critical section. On Intel(R) 64, all atomic operations are done with fetch
736// and add or compare and exchange. Therefore, the FLAG parameter to this
737// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
738// require a critical section, where we predict that they will be implemented
739// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
740//
741// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
742// the FLAG parameter should always be 1. If we know that we will be using
743// a critical section, then we want to make certain that we use the generic
744// lock __kmp_atomic_lock to protect the atomic update, and not of of the
745// locks that are specialized based upon the size or type of the data.
746//
747// If FLAG is 0, then we are relying on dead code elimination by the build
748// compiler to get rid of the useless block of code, and save a needless
749// branch at runtime.
750
751#ifdef KMP_GOMP_COMPAT
752#define OP_GOMP_CRITICAL(OP, FLAG) \
753 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
754 KMP_CHECK_GTID; \
755 OP_CRITICAL(OP, 0); \
756 return; \
757 }
758#else
759#define OP_GOMP_CRITICAL(OP, FLAG)
760#endif /* KMP_GOMP_COMPAT */
761
762#if KMP_MIC
763#define KMP_DO_PAUSE _mm_delay_32(1)
764#else
765#define KMP_DO_PAUSE KMP_CPU_PAUSE()
766#endif /* KMP_MIC */
767
768// ------------------------------------------------------------------------
769// Operation on *lhs, rhs using "compare_and_store" routine
770// TYPE - operands' type
771// BITS - size in bits, used to distinguish low level calls
772// OP - operator
773#define OP_CMPXCHG(TYPE, BITS, OP) \
774 { \
775 TYPE old_value, new_value; \
776 old_value = *(TYPE volatile *)lhs; \
777 new_value = old_value OP rhs; \
778 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
779 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
780 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
781 KMP_DO_PAUSE; \
782 \
783 old_value = *(TYPE volatile *)lhs; \
784 new_value = old_value OP rhs; \
785 } \
786 }
787
788#if USE_CMPXCHG_FIX
789// 2007-06-25:
790// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
791// and win_32e are affected (I verified the asm). Compiler ignores the volatile
792// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
793// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
794// the workaround.
795#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
796 { \
797 struct _sss { \
798 TYPE cmp; \
799 kmp_int##BITS *vvv; \
800 }; \
801 struct _sss old_value, new_value; \
802 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
803 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
804 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
805 new_value.cmp = old_value.cmp OP rhs; \
806 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
807 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
808 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
809 KMP_DO_PAUSE; \
810 \
811 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
812 new_value.cmp = old_value.cmp OP rhs; \
813 } \
814 }
815// end of the first part of the workaround for C78287
816#endif // USE_CMPXCHG_FIX
817
818#if KMP_ARCH_X86 || KMP_ARCH_X86_64
819
820// ------------------------------------------------------------------------
821// X86 or X86_64: no alignment problems ====================================
822#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
823 GOMP_FLAG) \
824 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
825 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
826 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
827 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
828 }
829// -------------------------------------------------------------------------
830#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
831 GOMP_FLAG) \
832 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
834 OP_CMPXCHG(TYPE, BITS, OP) \
835 }
836#if USE_CMPXCHG_FIX
837// -------------------------------------------------------------------------
838// workaround for C78287 (complex(kind=4) data type)
839#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
840 MASK, GOMP_FLAG) \
841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
842 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
843 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
844 }
845// end of the second part of the workaround for C78287
846#endif // USE_CMPXCHG_FIX
847
848#else
849// -------------------------------------------------------------------------
850// Code for other architectures that don't handle unaligned accesses.
851#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
852 GOMP_FLAG) \
853 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
854 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
855 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
856 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
857 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
858 } else { \
859 KMP_CHECK_GTID; \
860 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
861 } \
862 }
863// -------------------------------------------------------------------------
864#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
865 GOMP_FLAG) \
866 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
867 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
868 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
869 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
870 } else { \
871 KMP_CHECK_GTID; \
872 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
873 } \
874 }
875#if USE_CMPXCHG_FIX
876// -------------------------------------------------------------------------
877// workaround for C78287 (complex(kind=4) data type)
878#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
879 MASK, GOMP_FLAG) \
880 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
881 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
882 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
883 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
884 } else { \
885 KMP_CHECK_GTID; \
886 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
887 } \
888 }
889// end of the second part of the workaround for C78287
890#endif // USE_CMPXCHG_FIX
891#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
892
893// Routines for ATOMIC 4-byte operands addition and subtraction
894ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
895 0) // __kmpc_atomic_fixed4_add
896ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
897 0) // __kmpc_atomic_fixed4_sub
898
899ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
900 KMP_ARCH_X86) // __kmpc_atomic_float4_add
901ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
902 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
903
904// Routines for ATOMIC 8-byte operands addition and subtraction
905ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
906 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
907ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
908 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
909
910ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
911 KMP_ARCH_X86) // __kmpc_atomic_float8_add
912ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
913 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
914
915// ------------------------------------------------------------------------
916// Entries definition for integer operands
917// TYPE_ID - operands type and size (fixed4, float4)
918// OP_ID - operation identifier (add, sub, mul, ...)
919// TYPE - operand type
920// BITS - size in bits, used to distinguish low level calls
921// OP - operator (used in critical section)
922// LCK_ID - lock identifier, used to possibly distinguish lock variable
923// MASK - used for alignment check
924
925// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
926// ------------------------------------------------------------------------
927// Routines for ATOMIC integer operands, other operators
928// ------------------------------------------------------------------------
929// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
930ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
931 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
932ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
933 0) // __kmpc_atomic_fixed1_andb
934ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
935 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
936ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
937 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
938ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
939 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
940ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
941 0) // __kmpc_atomic_fixed1_orb
942ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
943 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
944ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
945 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
946ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
947 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
948ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
949 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
950ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
951 0) // __kmpc_atomic_fixed1_xor
952ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
953 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
954ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
955 0) // __kmpc_atomic_fixed2_andb
956ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
957 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
958ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
960ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
961 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
962ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
963 0) // __kmpc_atomic_fixed2_orb
964ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
965 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
966ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
967 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
968ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
969 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
970ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
971 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
972ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
973 0) // __kmpc_atomic_fixed2_xor
974ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
975 0) // __kmpc_atomic_fixed4_andb
976ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
977 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
978ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
979 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
980ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
981 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
982ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
983 0) // __kmpc_atomic_fixed4_orb
984ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
985 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
986ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
987 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
988ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
989 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
990ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
991 0) // __kmpc_atomic_fixed4_xor
992ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
994ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
995 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
996ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
997 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
998ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
999 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1000ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1001 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1002ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1003 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1004ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1005 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1006ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1007 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1008ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1009 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1010ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1011 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1012ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1013 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1014ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1015 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1016ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1017 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1018// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1019
1020/* ------------------------------------------------------------------------ */
1021/* Routines for C/C++ Reduction operators && and || */
1022
1023// ------------------------------------------------------------------------
1024// Need separate macros for &&, || because there is no combined assignment
1025// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1026#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1027 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1028 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1029 OP_CRITICAL(= *lhs OP, LCK_ID) \
1030 }
1031
1032#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1033
1034// ------------------------------------------------------------------------
1035// X86 or X86_64: no alignment problems ===================================
1036#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1037 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1038 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1039 OP_CMPXCHG(TYPE, BITS, OP) \
1040 }
1041
1042#else
1043// ------------------------------------------------------------------------
1044// Code for other architectures that don't handle unaligned accesses.
1045#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1046 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1047 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1048 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1049 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1050 } else { \
1051 KMP_CHECK_GTID; \
1052 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1053 } \
1054 }
1055#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1056
1057ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1058 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1059ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1060 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1061ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1062 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1063ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1064 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1065ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1066 0) // __kmpc_atomic_fixed4_andl
1067ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1068 0) // __kmpc_atomic_fixed4_orl
1069ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1070 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1071ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1072 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1073
1074/* ------------------------------------------------------------------------- */
1075/* Routines for Fortran operators that matched no one in C: */
1076/* MAX, MIN, .EQV., .NEQV. */
1077/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1078/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1079
1080// -------------------------------------------------------------------------
1081// MIN and MAX need separate macros
1082// OP - operator to check if we need any actions?
1083#define MIN_MAX_CRITSECT(OP, LCK_ID) \
1084 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1085 \
1086 if (*lhs OP rhs) { /* still need actions? */ \
1087 *lhs = rhs; \
1088 } \
1089 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1090
1091// -------------------------------------------------------------------------
1092#ifdef KMP_GOMP_COMPAT
1093#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1094 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1095 KMP_CHECK_GTID; \
1096 MIN_MAX_CRITSECT(OP, 0); \
1097 return; \
1098 }
1099#else
1100#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1101#endif /* KMP_GOMP_COMPAT */
1102
1103// -------------------------------------------------------------------------
1104#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1105 { \
1106 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1107 TYPE old_value; \
1108 temp_val = *lhs; \
1109 old_value = temp_val; \
1110 while (old_value OP rhs && /* still need actions? */ \
1111 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1112 (kmp_int##BITS *)lhs, \
1113 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1114 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1115 KMP_CPU_PAUSE(); \
1116 temp_val = *lhs; \
1117 old_value = temp_val; \
1118 } \
1119 }
1120
1121// -------------------------------------------------------------------------
1122// 1-byte, 2-byte operands - use critical section
1123#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1124 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1125 if (*lhs OP rhs) { /* need actions? */ \
1126 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1127 MIN_MAX_CRITSECT(OP, LCK_ID) \
1128 } \
1129 }
1130
1131#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1132
1133// -------------------------------------------------------------------------
1134// X86 or X86_64: no alignment problems ====================================
1135#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1136 GOMP_FLAG) \
1137 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1138 if (*lhs OP rhs) { \
1139 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1140 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1141 } \
1142 }
1143
1144#else
1145// -------------------------------------------------------------------------
1146// Code for other architectures that don't handle unaligned accesses.
1147#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1148 GOMP_FLAG) \
1149 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1150 if (*lhs OP rhs) { \
1151 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1152 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1153 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1154 } else { \
1155 KMP_CHECK_GTID; \
1156 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1157 } \
1158 } \
1159 }
1160#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1161
1162MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1163 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1164MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1166MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1167 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1168MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1169 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1170MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1171 0) // __kmpc_atomic_fixed4_max
1172MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1173 0) // __kmpc_atomic_fixed4_min
1174MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1175 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1176MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1177 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1178MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1179 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1180MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1181 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1182MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1183 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1184MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1185 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1186#if KMP_HAVE_QUAD
1187MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1188 1) // __kmpc_atomic_float16_max
1189MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1190 1) // __kmpc_atomic_float16_min
1191#if (KMP_ARCH_X86)
1192MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1193 1) // __kmpc_atomic_float16_max_a16
1194MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1195 1) // __kmpc_atomic_float16_min_a16
1196#endif // (KMP_ARCH_X86)
1197#endif // KMP_HAVE_QUAD
1198// ------------------------------------------------------------------------
1199// Need separate macros for .EQV. because of the need of complement (~)
1200// OP ignored for critical sections, ^=~ used instead
1201#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1202 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1203 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1204 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \
1205 }
1206
1207// ------------------------------------------------------------------------
1208#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1209// ------------------------------------------------------------------------
1210// X86 or X86_64: no alignment problems ===================================
1211#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1212 GOMP_FLAG) \
1213 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1214 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1215 OP_CMPXCHG(TYPE, BITS, OP) \
1216 }
1217// ------------------------------------------------------------------------
1218#else
1219// ------------------------------------------------------------------------
1220// Code for other architectures that don't handle unaligned accesses.
1221#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1222 GOMP_FLAG) \
1223 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1224 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \
1225 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1226 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1227 } else { \
1228 KMP_CHECK_GTID; \
1229 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \
1230 } \
1231 }
1232#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1233
1234ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1235 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1236ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1237 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1238ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1239 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1240ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1241 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1242ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1243 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1244ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1245 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1246ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1247 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1248ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1250
1251// ------------------------------------------------------------------------
1252// Routines for Extended types: long double, _Quad, complex flavours (use
1253// critical section)
1254// TYPE_ID, OP_ID, TYPE - detailed above
1255// OP - operator
1256// LCK_ID - lock identifier, used to possibly distinguish lock variable
1257#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1258 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1259 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1260 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1261 }
1262
1263/* ------------------------------------------------------------------------- */
1264// routines for long double type
1265ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1266 1) // __kmpc_atomic_float10_add
1267ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1268 1) // __kmpc_atomic_float10_sub
1269ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1270 1) // __kmpc_atomic_float10_mul
1271ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1272 1) // __kmpc_atomic_float10_div
1273#if KMP_HAVE_QUAD
1274// routines for _Quad type
1275ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1276 1) // __kmpc_atomic_float16_add
1277ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1278 1) // __kmpc_atomic_float16_sub
1279ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1280 1) // __kmpc_atomic_float16_mul
1281ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1282 1) // __kmpc_atomic_float16_div
1283#if (KMP_ARCH_X86)
1284ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1285 1) // __kmpc_atomic_float16_add_a16
1286ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1287 1) // __kmpc_atomic_float16_sub_a16
1288ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1289 1) // __kmpc_atomic_float16_mul_a16
1290ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1291 1) // __kmpc_atomic_float16_div_a16
1292#endif // (KMP_ARCH_X86)
1293#endif // KMP_HAVE_QUAD
1294// routines for complex types
1295
1296#if USE_CMPXCHG_FIX
1297// workaround for C78287 (complex(kind=4) data type)
1298ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1299 1) // __kmpc_atomic_cmplx4_add
1300ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1301 1) // __kmpc_atomic_cmplx4_sub
1302ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1303 1) // __kmpc_atomic_cmplx4_mul
1304ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1305 1) // __kmpc_atomic_cmplx4_div
1306// end of the workaround for C78287
1307#else
1308ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1309ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1310ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1311ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1312#endif // USE_CMPXCHG_FIX
1313
1314ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1315ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1316ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1317ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1318ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1319 1) // __kmpc_atomic_cmplx10_add
1320ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1321 1) // __kmpc_atomic_cmplx10_sub
1322ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1323 1) // __kmpc_atomic_cmplx10_mul
1324ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1325 1) // __kmpc_atomic_cmplx10_div
1326#if KMP_HAVE_QUAD
1327ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1328 1) // __kmpc_atomic_cmplx16_add
1329ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1330 1) // __kmpc_atomic_cmplx16_sub
1331ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1332 1) // __kmpc_atomic_cmplx16_mul
1333ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1334 1) // __kmpc_atomic_cmplx16_div
1335#if (KMP_ARCH_X86)
1336ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1337 1) // __kmpc_atomic_cmplx16_add_a16
1338ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1339 1) // __kmpc_atomic_cmplx16_sub_a16
1340ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1341 1) // __kmpc_atomic_cmplx16_mul_a16
1342ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1343 1) // __kmpc_atomic_cmplx16_div_a16
1344#endif // (KMP_ARCH_X86)
1345#endif // KMP_HAVE_QUAD
1346
1347// OpenMP 4.0: x = expr binop x for non-commutative operations.
1348// Supported only on IA-32 architecture and Intel(R) 64
1349#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1350
1351// ------------------------------------------------------------------------
1352// Operation on *lhs, rhs bound by critical section
1353// OP - operator (it's supposed to contain an assignment)
1354// LCK_ID - lock identifier
1355// Note: don't check gtid as it should always be valid
1356// 1, 2-byte - expect valid parameter, other - check before this macro
1357#define OP_CRITICAL_REV(OP, LCK_ID) \
1358 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1359 \
1360 (*lhs) = (rhs)OP(*lhs); \
1361 \
1362 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1363
1364#ifdef KMP_GOMP_COMPAT
1365#define OP_GOMP_CRITICAL_REV(OP, FLAG) \
1366 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1367 KMP_CHECK_GTID; \
1368 OP_CRITICAL_REV(OP, 0); \
1369 return; \
1370 }
1371#else
1372#define OP_GOMP_CRITICAL_REV(OP, FLAG)
1373#endif /* KMP_GOMP_COMPAT */
1374
1375// Beginning of a definition (provides name, parameters, gebug trace)
1376// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1377// fixed)
1378// OP_ID - operation identifier (add, sub, mul, ...)
1379// TYPE - operands' type
1380#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1381 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1382 TYPE *lhs, TYPE rhs) { \
1383 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1384 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1385
1386// ------------------------------------------------------------------------
1387// Operation on *lhs, rhs using "compare_and_store" routine
1388// TYPE - operands' type
1389// BITS - size in bits, used to distinguish low level calls
1390// OP - operator
1391// Note: temp_val introduced in order to force the compiler to read
1392// *lhs only once (w/o it the compiler reads *lhs twice)
1393#define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1394 { \
1395 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1396 TYPE old_value, new_value; \
1397 temp_val = *lhs; \
1398 old_value = temp_val; \
1399 new_value = rhs OP old_value; \
1400 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1401 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1402 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1403 KMP_DO_PAUSE; \
1404 \
1405 temp_val = *lhs; \
1406 old_value = temp_val; \
1407 new_value = rhs OP old_value; \
1408 } \
1409 }
1410
1411// -------------------------------------------------------------------------
1412#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1413 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1414 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1415 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1416 }
1417
1418// ------------------------------------------------------------------------
1419// Entries definition for integer operands
1420// TYPE_ID - operands type and size (fixed4, float4)
1421// OP_ID - operation identifier (add, sub, mul, ...)
1422// TYPE - operand type
1423// BITS - size in bits, used to distinguish low level calls
1424// OP - operator (used in critical section)
1425// LCK_ID - lock identifier, used to possibly distinguish lock variable
1426
1427// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1428// ------------------------------------------------------------------------
1429// Routines for ATOMIC integer operands, other operators
1430// ------------------------------------------------------------------------
1431// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1432ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1433 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1434ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1435 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1436ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1437 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1438ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1439 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1440ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1441 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1442ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1443 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1444
1445ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1446 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1447ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1448 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1449ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1450 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1451ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1452 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1453ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1454 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1455ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1456 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1457
1458ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1459 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1460ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1461 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1462ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1463 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1464ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1465 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1466ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1467 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1468ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1469 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1470
1471ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1472 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1473ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1474 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1475ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1476 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1477ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1478 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1479ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1480 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1481ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1482 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1483
1484ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1485 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1486ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1487 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1488
1489ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1490 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1491ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1492 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1493// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1494
1495// ------------------------------------------------------------------------
1496// Routines for Extended types: long double, _Quad, complex flavours (use
1497// critical section)
1498// TYPE_ID, OP_ID, TYPE - detailed above
1499// OP - operator
1500// LCK_ID - lock identifier, used to possibly distinguish lock variable
1501#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1502 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1503 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1504 OP_CRITICAL_REV(OP, LCK_ID) \
1505 }
1506
1507/* ------------------------------------------------------------------------- */
1508// routines for long double type
1509ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1510 1) // __kmpc_atomic_float10_sub_rev
1511ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1512 1) // __kmpc_atomic_float10_div_rev
1513#if KMP_HAVE_QUAD
1514// routines for _Quad type
1515ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1516 1) // __kmpc_atomic_float16_sub_rev
1517ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1518 1) // __kmpc_atomic_float16_div_rev
1519#if (KMP_ARCH_X86)
1520ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1521 1) // __kmpc_atomic_float16_sub_a16_rev
1522ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1523 1) // __kmpc_atomic_float16_div_a16_rev
1524#endif // KMP_ARCH_X86
1525#endif // KMP_HAVE_QUAD
1526
1527// routines for complex types
1528ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1529 1) // __kmpc_atomic_cmplx4_sub_rev
1530ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1531 1) // __kmpc_atomic_cmplx4_div_rev
1532ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1533 1) // __kmpc_atomic_cmplx8_sub_rev
1534ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1535 1) // __kmpc_atomic_cmplx8_div_rev
1536ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1537 1) // __kmpc_atomic_cmplx10_sub_rev
1538ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1539 1) // __kmpc_atomic_cmplx10_div_rev
1540#if KMP_HAVE_QUAD
1541ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1542 1) // __kmpc_atomic_cmplx16_sub_rev
1543ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1544 1) // __kmpc_atomic_cmplx16_div_rev
1545#if (KMP_ARCH_X86)
1546ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1547 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1548ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1549 1) // __kmpc_atomic_cmplx16_div_a16_rev
1550#endif // KMP_ARCH_X86
1551#endif // KMP_HAVE_QUAD
1552
1553#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1554// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1555
1556/* ------------------------------------------------------------------------ */
1557/* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1558/* Note: in order to reduce the total number of types combinations */
1559/* it is supposed that compiler converts RHS to longest floating type,*/
1560/* that is _Quad, before call to any of these routines */
1561/* Conversion to _Quad will be done by the compiler during calculation, */
1562/* conversion back to TYPE - before the assignment, like: */
1563/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1564/* Performance penalty expected because of SW emulation use */
1565/* ------------------------------------------------------------------------ */
1566
1567#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1568 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1569 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1570 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1571 KA_TRACE(100, \
1572 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1573 gtid));
1574
1575// -------------------------------------------------------------------------
1576#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1577 GOMP_FLAG) \
1578 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1579 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1580 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1581 }
1582
1583// -------------------------------------------------------------------------
1584#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1585// -------------------------------------------------------------------------
1586// X86 or X86_64: no alignment problems ====================================
1587#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1588 LCK_ID, MASK, GOMP_FLAG) \
1589 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1590 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1591 OP_CMPXCHG(TYPE, BITS, OP) \
1592 }
1593// -------------------------------------------------------------------------
1594#else
1595// ------------------------------------------------------------------------
1596// Code for other architectures that don't handle unaligned accesses.
1597#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1598 LCK_ID, MASK, GOMP_FLAG) \
1599 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1600 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1601 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1602 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1603 } else { \
1604 KMP_CHECK_GTID; \
1605 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1606 } \
1607 }
1608#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1609
1610// -------------------------------------------------------------------------
1611#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1612// -------------------------------------------------------------------------
1613#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1614 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1615 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1616 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1617 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1618 }
1619#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1620 LCK_ID, GOMP_FLAG) \
1621 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1622 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1623 OP_CRITICAL_REV(OP, LCK_ID) \
1624 }
1625#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1626
1627// RHS=float8
1628ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1629 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1630ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1631 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1632ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1633 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1634ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1635 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1636ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1637 0) // __kmpc_atomic_fixed4_mul_float8
1638ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1639 0) // __kmpc_atomic_fixed4_div_float8
1640ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1641 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1642ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1643 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1644ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1645 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1646ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1647 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1648ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1649 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1650ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1651 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1652
1653// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1654// use them)
1655#if KMP_HAVE_QUAD
1656ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1657 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1658ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1659 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1660ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1661 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1662ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1663 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1664ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1665 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1666ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1667 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1668ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1669 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1670ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1671 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1672
1673ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1674 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1675ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1676 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1677ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1678 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1679ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1680 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1681ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1682 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1683ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1684 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1685ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1686 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1687ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1688 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1689
1690ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1691 0) // __kmpc_atomic_fixed4_add_fp
1692ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1693 0) // __kmpc_atomic_fixed4u_add_fp
1694ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1695 0) // __kmpc_atomic_fixed4_sub_fp
1696ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1697 0) // __kmpc_atomic_fixed4u_sub_fp
1698ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1699 0) // __kmpc_atomic_fixed4_mul_fp
1700ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1701 0) // __kmpc_atomic_fixed4u_mul_fp
1702ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1703 0) // __kmpc_atomic_fixed4_div_fp
1704ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1705 0) // __kmpc_atomic_fixed4u_div_fp
1706
1707ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1708 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1709ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1710 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1711ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1712 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1713ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1714 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1715ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1716 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1717ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1718 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1719ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1720 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1721ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1722 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1723
1724ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1725 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1726ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1727 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1728ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1729 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1730ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1731 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1732
1733ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1734 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1735ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1736 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1737ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1738 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1739ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1740 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1741
1742ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1743 1) // __kmpc_atomic_float10_add_fp
1744ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1745 1) // __kmpc_atomic_float10_sub_fp
1746ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1747 1) // __kmpc_atomic_float10_mul_fp
1748ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1749 1) // __kmpc_atomic_float10_div_fp
1750
1751#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1752// Reverse operations
1753ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1754 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1755ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1756 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1757ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1758 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1759ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1760 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1761
1762ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1763 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1764ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1765 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1766ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1767 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1768ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1769 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1770
1771ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1772 0) // __kmpc_atomic_fixed4_sub_rev_fp
1773ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1774 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1775ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1776 0) // __kmpc_atomic_fixed4_div_rev_fp
1777ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1778 0) // __kmpc_atomic_fixed4u_div_rev_fp
1779
1780ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1781 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1782ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1783 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1784ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1785 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1786ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1787 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1788
1789ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1790 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1791ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1792 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1793
1794ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1795 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1796ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1797 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1798
1799ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1800 1) // __kmpc_atomic_float10_sub_rev_fp
1801ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1802 1) // __kmpc_atomic_float10_div_rev_fp
1803#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1804
1805#endif // KMP_HAVE_QUAD
1806
1807#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1808// ------------------------------------------------------------------------
1809// X86 or X86_64: no alignment problems ====================================
1810#if USE_CMPXCHG_FIX
1811// workaround for C78287 (complex(kind=4) data type)
1812#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1813 LCK_ID, MASK, GOMP_FLAG) \
1814 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1815 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1816 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1817 }
1818// end of the second part of the workaround for C78287
1819#else
1820#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1821 LCK_ID, MASK, GOMP_FLAG) \
1822 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1823 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1824 OP_CMPXCHG(TYPE, BITS, OP) \
1825 }
1826#endif // USE_CMPXCHG_FIX
1827#else
1828// ------------------------------------------------------------------------
1829// Code for other architectures that don't handle unaligned accesses.
1830#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1831 LCK_ID, MASK, GOMP_FLAG) \
1832 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1834 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1835 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1836 } else { \
1837 KMP_CHECK_GTID; \
1838 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1839 } \
1840 }
1841#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1842
1843ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1844 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1845ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1846 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1847ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1848 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1849ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1850 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1851
1852// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1853#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1854
1855// ------------------------------------------------------------------------
1856// Atomic READ routines
1857
1858// ------------------------------------------------------------------------
1859// Beginning of a definition (provides name, parameters, gebug trace)
1860// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1861// fixed)
1862// OP_ID - operation identifier (add, sub, mul, ...)
1863// TYPE - operands' type
1864#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1865 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1866 TYPE *loc) { \
1867 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1868 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1869
1870// ------------------------------------------------------------------------
1871// Operation on *lhs, rhs using "compare_and_store_ret" routine
1872// TYPE - operands' type
1873// BITS - size in bits, used to distinguish low level calls
1874// OP - operator
1875// Note: temp_val introduced in order to force the compiler to read
1876// *lhs only once (w/o it the compiler reads *lhs twice)
1877// TODO: check if it is still necessary
1878// Return old value regardless of the result of "compare & swap# operation
1879#define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1880 { \
1881 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1882 union f_i_union { \
1883 TYPE f_val; \
1884 kmp_int##BITS i_val; \
1885 }; \
1886 union f_i_union old_value; \
1887 temp_val = *loc; \
1888 old_value.f_val = temp_val; \
1889 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1890 (kmp_int##BITS *)loc, \
1891 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1892 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1893 new_value = old_value.f_val; \
1894 return new_value; \
1895 }
1896
1897// -------------------------------------------------------------------------
1898// Operation on *lhs, rhs bound by critical section
1899// OP - operator (it's supposed to contain an assignment)
1900// LCK_ID - lock identifier
1901// Note: don't check gtid as it should always be valid
1902// 1, 2-byte - expect valid parameter, other - check before this macro
1903#define OP_CRITICAL_READ(OP, LCK_ID) \
1904 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1905 \
1906 new_value = (*loc); \
1907 \
1908 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1909
1910// -------------------------------------------------------------------------
1911#ifdef KMP_GOMP_COMPAT
1912#define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1913 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1914 KMP_CHECK_GTID; \
1915 OP_CRITICAL_READ(OP, 0); \
1916 return new_value; \
1917 }
1918#else
1919#define OP_GOMP_CRITICAL_READ(OP, FLAG)
1920#endif /* KMP_GOMP_COMPAT */
1921
1922// -------------------------------------------------------------------------
1923#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1924 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1925 TYPE new_value; \
1926 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1927 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1928 return new_value; \
1929 }
1930// -------------------------------------------------------------------------
1931#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1932 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1933 TYPE new_value; \
1934 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1935 OP_CMPXCHG_READ(TYPE, BITS, OP) \
1936 }
1937// ------------------------------------------------------------------------
1938// Routines for Extended types: long double, _Quad, complex flavours (use
1939// critical section)
1940// TYPE_ID, OP_ID, TYPE - detailed above
1941// OP - operator
1942// LCK_ID - lock identifier, used to possibly distinguish lock variable
1943#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1944 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1945 TYPE new_value; \
1946 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1947 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1948 return new_value; \
1949 }
1950
1951// ------------------------------------------------------------------------
1952// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1953// value doesn't work.
1954// Let's return the read value through the additional parameter.
1955#if (KMP_OS_WINDOWS)
1956
1957#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1958 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1959 \
1960 (*out) = (*loc); \
1961 \
1962 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1963// ------------------------------------------------------------------------
1964#ifdef KMP_GOMP_COMPAT
1965#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1966 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1967 KMP_CHECK_GTID; \
1968 OP_CRITICAL_READ_WRK(OP, 0); \
1969 }
1970#else
1971#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1972#endif /* KMP_GOMP_COMPAT */
1973// ------------------------------------------------------------------------
1974#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1975 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1976 TYPE *loc) { \
1977 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1978 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1979
1980// ------------------------------------------------------------------------
1981#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1982 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1983 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
1984 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
1985 }
1986
1987#endif // KMP_OS_WINDOWS
1988
1989// ------------------------------------------------------------------------
1990// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1991ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1992ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1994ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
1995 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
1996ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
1997 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
1998
1999// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2000ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2001 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2002ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2003 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2004
2005ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2006 1) // __kmpc_atomic_float10_rd
2007#if KMP_HAVE_QUAD
2008ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2009 1) // __kmpc_atomic_float16_rd
2010#endif // KMP_HAVE_QUAD
2011
2012// Fix for CQ220361 on Windows* OS
2013#if (KMP_OS_WINDOWS)
2014ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2015 1) // __kmpc_atomic_cmplx4_rd
2016#else
2017ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2018 1) // __kmpc_atomic_cmplx4_rd
2019#endif // (KMP_OS_WINDOWS)
2020ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2021 1) // __kmpc_atomic_cmplx8_rd
2022ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2023 1) // __kmpc_atomic_cmplx10_rd
2024#if KMP_HAVE_QUAD
2025ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2026 1) // __kmpc_atomic_cmplx16_rd
2027#if (KMP_ARCH_X86)
2028ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2029 1) // __kmpc_atomic_float16_a16_rd
2030ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2031 1) // __kmpc_atomic_cmplx16_a16_rd
2032#endif // (KMP_ARCH_X86)
2033#endif // KMP_HAVE_QUAD
2034
2035// ------------------------------------------------------------------------
2036// Atomic WRITE routines
2037
2038#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2039 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2040 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2041 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2042 }
2043// ------------------------------------------------------------------------
2044#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2045 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2046 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2047 KMP_XCHG_REAL##BITS(lhs, rhs); \
2048 }
2049
2050// ------------------------------------------------------------------------
2051// Operation on *lhs, rhs using "compare_and_store" routine
2052// TYPE - operands' type
2053// BITS - size in bits, used to distinguish low level calls
2054// OP - operator
2055// Note: temp_val introduced in order to force the compiler to read
2056// *lhs only once (w/o it the compiler reads *lhs twice)
2057#define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2058 { \
2059 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2060 TYPE old_value, new_value; \
2061 temp_val = *lhs; \
2062 old_value = temp_val; \
2063 new_value = rhs; \
2064 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2065 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2066 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2067 KMP_CPU_PAUSE(); \
2068 \
2069 temp_val = *lhs; \
2070 old_value = temp_val; \
2071 new_value = rhs; \
2072 } \
2073 }
2074
2075// -------------------------------------------------------------------------
2076#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2077 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2078 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2079 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2080 }
2081
2082// ------------------------------------------------------------------------
2083// Routines for Extended types: long double, _Quad, complex flavours (use
2084// critical section)
2085// TYPE_ID, OP_ID, TYPE - detailed above
2086// OP - operator
2087// LCK_ID - lock identifier, used to possibly distinguish lock variable
2088#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2089 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2090 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2091 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2092 }
2093// -------------------------------------------------------------------------
2094
2095ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2096 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2097ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2098 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2099ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2100 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2101#if (KMP_ARCH_X86)
2102ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2103 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2104#else
2105ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2106 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2107#endif // (KMP_ARCH_X86)
2108
2109ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2110 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2111#if (KMP_ARCH_X86)
2112ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2113 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2114#else
2115ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2116 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2117#endif // (KMP_ARCH_X86)
2118
2119ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2120 1) // __kmpc_atomic_float10_wr
2121#if KMP_HAVE_QUAD
2122ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2123 1) // __kmpc_atomic_float16_wr
2124#endif // KMP_HAVE_QUAD
2125ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2126ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2127 1) // __kmpc_atomic_cmplx8_wr
2128ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2129 1) // __kmpc_atomic_cmplx10_wr
2130#if KMP_HAVE_QUAD
2131ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2132 1) // __kmpc_atomic_cmplx16_wr
2133#if (KMP_ARCH_X86)
2134ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2135 1) // __kmpc_atomic_float16_a16_wr
2136ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2137 1) // __kmpc_atomic_cmplx16_a16_wr
2138#endif // (KMP_ARCH_X86)
2139#endif // KMP_HAVE_QUAD
2140
2141// ------------------------------------------------------------------------
2142// Atomic CAPTURE routines
2143
2144// Beginning of a definition (provides name, parameters, gebug trace)
2145// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2146// fixed)
2147// OP_ID - operation identifier (add, sub, mul, ...)
2148// TYPE - operands' type
2149#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2150 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2151 TYPE *lhs, TYPE rhs, int flag) { \
2152 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2153 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2154
2155// -------------------------------------------------------------------------
2156// Operation on *lhs, rhs bound by critical section
2157// OP - operator (it's supposed to contain an assignment)
2158// LCK_ID - lock identifier
2159// Note: don't check gtid as it should always be valid
2160// 1, 2-byte - expect valid parameter, other - check before this macro
2161#define OP_CRITICAL_CPT(OP, LCK_ID) \
2162 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2163 \
2164 if (flag) { \
2165 (*lhs) OP rhs; \
2166 new_value = (*lhs); \
2167 } else { \
2168 new_value = (*lhs); \
2169 (*lhs) OP rhs; \
2170 } \
2171 \
2172 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2173 return new_value;
2174
2175// ------------------------------------------------------------------------
2176#ifdef KMP_GOMP_COMPAT
2177#define OP_GOMP_CRITICAL_CPT(OP, FLAG) \
2178 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2179 KMP_CHECK_GTID; \
2180 OP_CRITICAL_CPT(OP## =, 0); \
2181 }
2182#else
2183#define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2184#endif /* KMP_GOMP_COMPAT */
2185
2186// ------------------------------------------------------------------------
2187// Operation on *lhs, rhs using "compare_and_store" routine
2188// TYPE - operands' type
2189// BITS - size in bits, used to distinguish low level calls
2190// OP - operator
2191// Note: temp_val introduced in order to force the compiler to read
2192// *lhs only once (w/o it the compiler reads *lhs twice)
2193#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2194 { \
2195 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2196 TYPE old_value, new_value; \
2197 temp_val = *lhs; \
2198 old_value = temp_val; \
2199 new_value = old_value OP rhs; \
2200 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2201 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2202 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2203 KMP_CPU_PAUSE(); \
2204 \
2205 temp_val = *lhs; \
2206 old_value = temp_val; \
2207 new_value = old_value OP rhs; \
2208 } \
2209 if (flag) { \
2210 return new_value; \
2211 } else \
2212 return old_value; \
2213 }
2214
2215// -------------------------------------------------------------------------
2216#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2217 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2218 TYPE new_value; \
2219 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2220 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2221 }
2222
2223// -------------------------------------------------------------------------
2224#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2225 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2226 TYPE old_value, new_value; \
2227 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2228 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2229 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2230 if (flag) { \
2231 return old_value OP rhs; \
2232 } else \
2233 return old_value; \
2234 }
2235// -------------------------------------------------------------------------
2236
2237ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2238 0) // __kmpc_atomic_fixed4_add_cpt
2239ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2240 0) // __kmpc_atomic_fixed4_sub_cpt
2241ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2242 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2243ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2244 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2245
2246ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2247 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2248ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2249 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2250ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2251 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2252ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2253 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2254
2255// ------------------------------------------------------------------------
2256// Entries definition for integer operands
2257// TYPE_ID - operands type and size (fixed4, float4)
2258// OP_ID - operation identifier (add, sub, mul, ...)
2259// TYPE - operand type
2260// BITS - size in bits, used to distinguish low level calls
2261// OP - operator (used in critical section)
2262// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2263// ------------------------------------------------------------------------
2264// Routines for ATOMIC integer operands, other operators
2265// ------------------------------------------------------------------------
2266// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2267ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2268 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2269ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2270 0) // __kmpc_atomic_fixed1_andb_cpt
2271ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2272 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2273ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2274 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2275ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2276 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2277ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2278 0) // __kmpc_atomic_fixed1_orb_cpt
2279ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2280 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2281ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2282 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2283ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2284 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2285ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2286 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2287ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2288 0) // __kmpc_atomic_fixed1_xor_cpt
2289ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2290 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2291ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2292 0) // __kmpc_atomic_fixed2_andb_cpt
2293ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2294 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2295ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2296 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2297ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2298 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2299ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2300 0) // __kmpc_atomic_fixed2_orb_cpt
2301ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2302 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2303ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2304 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2305ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2306 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2307ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2308 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2309ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2310 0) // __kmpc_atomic_fixed2_xor_cpt
2311ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2312 0) // __kmpc_atomic_fixed4_andb_cpt
2313ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2314 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2315ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2316 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2317ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2318 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2319ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2320 0) // __kmpc_atomic_fixed4_orb_cpt
2321ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2322 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2323ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2324 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2325ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2326 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2327ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2328 0) // __kmpc_atomic_fixed4_xor_cpt
2329ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2330 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2331ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2332 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2333ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2334 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2335ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2336 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2337ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2338 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2339ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2340 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2341ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2342 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2343ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2344 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2345ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2346 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2347ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2348 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2349ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2350 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2351ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2352 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2353ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2354 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2355// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2356
2357// CAPTURE routines for mixed types RHS=float16
2358#if KMP_HAVE_QUAD
2359
2360// Beginning of a definition (provides name, parameters, gebug trace)
2361// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2362// fixed)
2363// OP_ID - operation identifier (add, sub, mul, ...)
2364// TYPE - operands' type
2365#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2366 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2367 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2368 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2369 KA_TRACE(100, \
2370 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2371 gtid));
2372
2373// -------------------------------------------------------------------------
2374#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2375 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2376 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2377 TYPE new_value; \
2378 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2379 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2380 }
2381
2382// -------------------------------------------------------------------------
2383#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2384 LCK_ID, GOMP_FLAG) \
2385 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2386 TYPE new_value; \
2387 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2388 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2389 }
2390
2391ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2392 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2393ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2394 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2395ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2396 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2397ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2398 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2399ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2400 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2401ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2402 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2403ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2404 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2405ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2406 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2407
2408ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2409 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2410ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2411 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2412ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2413 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2414ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2415 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2416ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2417 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2418ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2419 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2420ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2421 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2422ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2423 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2424
2425ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2426 0) // __kmpc_atomic_fixed4_add_cpt_fp
2427ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2428 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2429ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2430 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2431ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2432 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2433ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2434 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2435ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2436 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2437ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2438 0) // __kmpc_atomic_fixed4_div_cpt_fp
2439ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2440 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2441
2442ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2443 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2444ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2445 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2446ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2447 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2448ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2449 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2450ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2451 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2452ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2453 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2454ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2455 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2456ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2457 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2458
2459ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2460 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2461ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2462 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2463ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2464 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2465ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2466 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2467
2468ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2469 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2470ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2471 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2472ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2473 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2474ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2475 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2476
2477ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2478 1) // __kmpc_atomic_float10_add_cpt_fp
2479ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2480 1) // __kmpc_atomic_float10_sub_cpt_fp
2481ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2482 1) // __kmpc_atomic_float10_mul_cpt_fp
2483ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2484 1) // __kmpc_atomic_float10_div_cpt_fp
2485
2486#endif // KMP_HAVE_QUAD
2487
2488// ------------------------------------------------------------------------
2489// Routines for C/C++ Reduction operators && and ||
2490
2491// -------------------------------------------------------------------------
2492// Operation on *lhs, rhs bound by critical section
2493// OP - operator (it's supposed to contain an assignment)
2494// LCK_ID - lock identifier
2495// Note: don't check gtid as it should always be valid
2496// 1, 2-byte - expect valid parameter, other - check before this macro
2497#define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2498 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2499 \
2500 if (flag) { \
2501 new_value OP rhs; \
2502 } else \
2503 new_value = (*lhs); \
2504 \
2505 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2506
2507// ------------------------------------------------------------------------
2508#ifdef KMP_GOMP_COMPAT
2509#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2510 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2511 KMP_CHECK_GTID; \
2512 OP_CRITICAL_L_CPT(OP, 0); \
2513 return new_value; \
2514 }
2515#else
2516#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2517#endif /* KMP_GOMP_COMPAT */
2518
2519// ------------------------------------------------------------------------
2520// Need separate macros for &&, || because there is no combined assignment
2521#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2522 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2523 TYPE new_value; \
2524 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2525 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2526 }
2527
2528ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2529 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2530ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2531 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2532ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2533 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2534ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2535 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2536ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2537 0) // __kmpc_atomic_fixed4_andl_cpt
2538ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2539 0) // __kmpc_atomic_fixed4_orl_cpt
2540ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2542ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2543 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2544
2545// -------------------------------------------------------------------------
2546// Routines for Fortran operators that matched no one in C:
2547// MAX, MIN, .EQV., .NEQV.
2548// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2549// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2550
2551// -------------------------------------------------------------------------
2552// MIN and MAX need separate macros
2553// OP - operator to check if we need any actions?
2554#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2555 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2556 \
2557 if (*lhs OP rhs) { /* still need actions? */ \
2558 old_value = *lhs; \
2559 *lhs = rhs; \
2560 if (flag) \
2561 new_value = rhs; \
2562 else \
2563 new_value = old_value; \
2564 } else { \
2565 new_value = *lhs; \
2566 } \
2567 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2568 return new_value;
2569
2570// -------------------------------------------------------------------------
2571#ifdef KMP_GOMP_COMPAT
2572#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2573 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2574 KMP_CHECK_GTID; \
2575 MIN_MAX_CRITSECT_CPT(OP, 0); \
2576 }
2577#else
2578#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2579#endif /* KMP_GOMP_COMPAT */
2580
2581// -------------------------------------------------------------------------
2582#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2583 { \
2584 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2585 /*TYPE old_value; */ \
2586 temp_val = *lhs; \
2587 old_value = temp_val; \
2588 while (old_value OP rhs && /* still need actions? */ \
2589 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2590 (kmp_int##BITS *)lhs, \
2591 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2592 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2593 KMP_CPU_PAUSE(); \
2594 temp_val = *lhs; \
2595 old_value = temp_val; \
2596 } \
2597 if (flag) \
2598 return rhs; \
2599 else \
2600 return old_value; \
2601 }
2602
2603// -------------------------------------------------------------------------
2604// 1-byte, 2-byte operands - use critical section
2605#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2606 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2607 TYPE new_value, old_value; \
2608 if (*lhs OP rhs) { /* need actions? */ \
2609 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2610 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2611 } \
2612 return *lhs; \
2613 }
2614
2615#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2616 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2617 TYPE new_value, old_value; \
2618 if (*lhs OP rhs) { \
2619 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2620 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2621 } \
2622 return *lhs; \
2623 }
2624
2625MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2626 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2627MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2628 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2629MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2630 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2631MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2632 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2633MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2634 0) // __kmpc_atomic_fixed4_max_cpt
2635MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2636 0) // __kmpc_atomic_fixed4_min_cpt
2637MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2638 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2639MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2640 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2641MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2642 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2643MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2644 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2645MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2646 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2647MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2648 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2649#if KMP_HAVE_QUAD
2650MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2651 1) // __kmpc_atomic_float16_max_cpt
2652MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2653 1) // __kmpc_atomic_float16_min_cpt
2654#if (KMP_ARCH_X86)
2655MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2656 1) // __kmpc_atomic_float16_max_a16_cpt
2657MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2658 1) // __kmpc_atomic_float16_mix_a16_cpt
2659#endif // (KMP_ARCH_X86)
2660#endif // KMP_HAVE_QUAD
2661
2662// ------------------------------------------------------------------------
2663#ifdef KMP_GOMP_COMPAT
2664#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2665 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2666 KMP_CHECK_GTID; \
2667 OP_CRITICAL_CPT(OP, 0); \
2668 }
2669#else
2670#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2671#endif /* KMP_GOMP_COMPAT */
2672// ------------------------------------------------------------------------
2673#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2674 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2675 TYPE new_value; \
2676 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \
2677 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2678 }
2679
2680// ------------------------------------------------------------------------
2681
2682ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2684ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2685 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2686ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2687 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2688ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2689 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2690ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2691 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2692ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2693 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2694ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2695 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2696ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2697 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2698
2699// ------------------------------------------------------------------------
2700// Routines for Extended types: long double, _Quad, complex flavours (use
2701// critical section)
2702// TYPE_ID, OP_ID, TYPE - detailed above
2703// OP - operator
2704// LCK_ID - lock identifier, used to possibly distinguish lock variable
2705#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2706 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2707 TYPE new_value; \
2708 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2709 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2710 }
2711
2712// ------------------------------------------------------------------------
2713// Workaround for cmplx4. Regular routines with return value don't work
2714// on Win_32e. Let's return captured values through the additional parameter.
2715#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2716 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2717 \
2718 if (flag) { \
2719 (*lhs) OP rhs; \
2720 (*out) = (*lhs); \
2721 } else { \
2722 (*out) = (*lhs); \
2723 (*lhs) OP rhs; \
2724 } \
2725 \
2726 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2727 return;
2728// ------------------------------------------------------------------------
2729
2730#ifdef KMP_GOMP_COMPAT
2731#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2732 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2733 KMP_CHECK_GTID; \
2734 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2735 }
2736#else
2737#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2738#endif /* KMP_GOMP_COMPAT */
2739// ------------------------------------------------------------------------
2740
2741#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2742 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2743 TYPE rhs, TYPE *out, int flag) { \
2744 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2745 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2746// ------------------------------------------------------------------------
2747
2748#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2749 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2750 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2751 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2752 }
2753// The end of workaround for cmplx4
2754
2755/* ------------------------------------------------------------------------- */
2756// routines for long double type
2757ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2758 1) // __kmpc_atomic_float10_add_cpt
2759ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2760 1) // __kmpc_atomic_float10_sub_cpt
2761ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2762 1) // __kmpc_atomic_float10_mul_cpt
2763ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2764 1) // __kmpc_atomic_float10_div_cpt
2765#if KMP_HAVE_QUAD
2766// routines for _Quad type
2767ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2768 1) // __kmpc_atomic_float16_add_cpt
2769ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2770 1) // __kmpc_atomic_float16_sub_cpt
2771ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2772 1) // __kmpc_atomic_float16_mul_cpt
2773ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2774 1) // __kmpc_atomic_float16_div_cpt
2775#if (KMP_ARCH_X86)
2776ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2777 1) // __kmpc_atomic_float16_add_a16_cpt
2778ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2779 1) // __kmpc_atomic_float16_sub_a16_cpt
2780ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2781 1) // __kmpc_atomic_float16_mul_a16_cpt
2782ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2783 1) // __kmpc_atomic_float16_div_a16_cpt
2784#endif // (KMP_ARCH_X86)
2785#endif // KMP_HAVE_QUAD
2786
2787// routines for complex types
2788
2789// cmplx4 routines to return void
2790ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2791 1) // __kmpc_atomic_cmplx4_add_cpt
2792ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2793 1) // __kmpc_atomic_cmplx4_sub_cpt
2794ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2795 1) // __kmpc_atomic_cmplx4_mul_cpt
2796ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2797 1) // __kmpc_atomic_cmplx4_div_cpt
2798
2799ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2800 1) // __kmpc_atomic_cmplx8_add_cpt
2801ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2802 1) // __kmpc_atomic_cmplx8_sub_cpt
2803ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2804 1) // __kmpc_atomic_cmplx8_mul_cpt
2805ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2806 1) // __kmpc_atomic_cmplx8_div_cpt
2807ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2808 1) // __kmpc_atomic_cmplx10_add_cpt
2809ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2810 1) // __kmpc_atomic_cmplx10_sub_cpt
2811ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2812 1) // __kmpc_atomic_cmplx10_mul_cpt
2813ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2814 1) // __kmpc_atomic_cmplx10_div_cpt
2815#if KMP_HAVE_QUAD
2816ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2817 1) // __kmpc_atomic_cmplx16_add_cpt
2818ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2819 1) // __kmpc_atomic_cmplx16_sub_cpt
2820ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2821 1) // __kmpc_atomic_cmplx16_mul_cpt
2822ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2823 1) // __kmpc_atomic_cmplx16_div_cpt
2824#if (KMP_ARCH_X86)
2825ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2826 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2827ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2828 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2829ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2830 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2831ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2832 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2833#endif // (KMP_ARCH_X86)
2834#endif // KMP_HAVE_QUAD
2835
2836// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2837// binop x; v = x; } for non-commutative operations.
2838// Supported only on IA-32 architecture and Intel(R) 64
2839
2840// -------------------------------------------------------------------------
2841// Operation on *lhs, rhs bound by critical section
2842// OP - operator (it's supposed to contain an assignment)
2843// LCK_ID - lock identifier
2844// Note: don't check gtid as it should always be valid
2845// 1, 2-byte - expect valid parameter, other - check before this macro
2846#define OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2847 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2848 \
2849 if (flag) { \
2850 /*temp_val = (*lhs);*/ \
2851 (*lhs) = (rhs)OP(*lhs); \
2852 new_value = (*lhs); \
2853 } else { \
2854 new_value = (*lhs); \
2855 (*lhs) = (rhs)OP(*lhs); \
2856 } \
2857 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2858 return new_value;
2859
2860// ------------------------------------------------------------------------
2861#ifdef KMP_GOMP_COMPAT
2862#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \
2863 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2864 KMP_CHECK_GTID; \
2865 OP_CRITICAL_CPT_REV(OP, 0); \
2866 }
2867#else
2868#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2869#endif /* KMP_GOMP_COMPAT */
2870
2871// ------------------------------------------------------------------------
2872// Operation on *lhs, rhs using "compare_and_store" routine
2873// TYPE - operands' type
2874// BITS - size in bits, used to distinguish low level calls
2875// OP - operator
2876// Note: temp_val introduced in order to force the compiler to read
2877// *lhs only once (w/o it the compiler reads *lhs twice)
2878#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2879 { \
2880 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2881 TYPE old_value, new_value; \
2882 temp_val = *lhs; \
2883 old_value = temp_val; \
2884 new_value = rhs OP old_value; \
2885 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2886 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2887 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2888 KMP_CPU_PAUSE(); \
2889 \
2890 temp_val = *lhs; \
2891 old_value = temp_val; \
2892 new_value = rhs OP old_value; \
2893 } \
2894 if (flag) { \
2895 return new_value; \
2896 } else \
2897 return old_value; \
2898 }
2899
2900// -------------------------------------------------------------------------
2901#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2902 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2903 TYPE new_value; \
2904 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2905 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2906 }
2907
2908ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2909 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2910ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2911 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2912ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2913 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2914ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2915 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2916ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2918ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2920ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2921 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2922ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2923 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2924ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2925 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2926ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2927 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2928ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2930ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2932ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2933 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2934ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2935 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2936ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2937 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2938ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2939 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2940ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2942ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2944ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2945 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2946ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2947 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2948ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2949 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2950ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2951 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2952ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2954ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2956ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2957 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2958ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2959 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2960ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2961 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2962ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2963 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2964// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2965
2966// ------------------------------------------------------------------------
2967// Routines for Extended types: long double, _Quad, complex flavours (use
2968// critical section)
2969// TYPE_ID, OP_ID, TYPE - detailed above
2970// OP - operator
2971// LCK_ID - lock identifier, used to possibly distinguish lock variable
2972#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2973 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2974 TYPE new_value; \
2975 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
2976 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2977 OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2978 }
2979
2980/* ------------------------------------------------------------------------- */
2981// routines for long double type
2982ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2983 1) // __kmpc_atomic_float10_sub_cpt_rev
2984ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2985 1) // __kmpc_atomic_float10_div_cpt_rev
2986#if KMP_HAVE_QUAD
2987// routines for _Quad type
2988ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2989 1) // __kmpc_atomic_float16_sub_cpt_rev
2990ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2991 1) // __kmpc_atomic_float16_div_cpt_rev
2992#if (KMP_ARCH_X86)
2993ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2994 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
2995ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
2996 1) // __kmpc_atomic_float16_div_a16_cpt_rev
2997#endif // (KMP_ARCH_X86)
2998#endif // KMP_HAVE_QUAD
2999
3000// routines for complex types
3001
3002// ------------------------------------------------------------------------
3003// Workaround for cmplx4. Regular routines with return value don't work
3004// on Win_32e. Let's return captured values through the additional parameter.
3005#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3006 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3007 \
3008 if (flag) { \
3009 (*lhs) = (rhs)OP(*lhs); \
3010 (*out) = (*lhs); \
3011 } else { \
3012 (*out) = (*lhs); \
3013 (*lhs) = (rhs)OP(*lhs); \
3014 } \
3015 \
3016 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3017 return;
3018// ------------------------------------------------------------------------
3019
3020#ifdef KMP_GOMP_COMPAT
3021#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3022 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3023 KMP_CHECK_GTID; \
3024 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3025 }
3026#else
3027#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3028#endif /* KMP_GOMP_COMPAT */
3029// ------------------------------------------------------------------------
3030
3031#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3032 GOMP_FLAG) \
3033 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3034 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3035 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3036 }
3037// The end of workaround for cmplx4
3038
3039// !!! TODO: check if we need to return void for cmplx4 routines
3040// cmplx4 routines to return void
3041ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3042 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3043ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3044 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3045
3046ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3047 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3048ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3049 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3050ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3051 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3052ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3053 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3054#if KMP_HAVE_QUAD
3055ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3056 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3057ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3058 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3059#if (KMP_ARCH_X86)
3060ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3061 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3062ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3063 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3064#endif // (KMP_ARCH_X86)
3065#endif // KMP_HAVE_QUAD
3066
3067// Capture reverse for mixed type: RHS=float16
3068#if KMP_HAVE_QUAD
3069
3070// Beginning of a definition (provides name, parameters, gebug trace)
3071// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3072// fixed)
3073// OP_ID - operation identifier (add, sub, mul, ...)
3074// TYPE - operands' type
3075// -------------------------------------------------------------------------
3076#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3077 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3078 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3079 TYPE new_value; \
3080 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
3081 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3082 }
3083
3084// -------------------------------------------------------------------------
3085#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3086 LCK_ID, GOMP_FLAG) \
3087 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3088 TYPE new_value; \
3089 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \
3090 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \
3091 }
3092
3093ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3094 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3095ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3096 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3097ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3098 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3099ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3100 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3101
3102ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3103 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3104ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3105 1,
3106 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3107ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3108 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3109ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3110 1,
3111 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3112
3113ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3114 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3115ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3116 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3117ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3118 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3119ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3120 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3121
3122ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3123 7,
3124 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3125ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3126 8i, 7,
3127 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3128ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3129 7,
3130 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3131ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3132 8i, 7,
3133 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3134
3135ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3136 4r, 3,
3137 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3138ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3139 4r, 3,
3140 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3141
3142ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3143 8r, 7,
3144 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3145ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3146 8r, 7,
3147 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3148
3149ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3150 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3151ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3152 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3153
3154#endif // KMP_HAVE_QUAD
3155
3156// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3157
3158#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3159 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3160 TYPE rhs) { \
3161 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3162 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3163
3164#define CRITICAL_SWP(LCK_ID) \
3165 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3166 \
3167 old_value = (*lhs); \
3168 (*lhs) = rhs; \
3169 \
3170 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3171 return old_value;
3172
3173// ------------------------------------------------------------------------
3174#ifdef KMP_GOMP_COMPAT
3175#define GOMP_CRITICAL_SWP(FLAG) \
3176 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3177 KMP_CHECK_GTID; \
3178 CRITICAL_SWP(0); \
3179 }
3180#else
3181#define GOMP_CRITICAL_SWP(FLAG)
3182#endif /* KMP_GOMP_COMPAT */
3183
3184#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3185 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3186 TYPE old_value; \
3187 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3188 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3189 return old_value; \
3190 }
3191// ------------------------------------------------------------------------
3192#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3193 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3194 TYPE old_value; \
3195 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3196 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3197 return old_value; \
3198 }
3199
3200// ------------------------------------------------------------------------
3201#define CMPXCHG_SWP(TYPE, BITS) \
3202 { \
3203 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3204 TYPE old_value, new_value; \
3205 temp_val = *lhs; \
3206 old_value = temp_val; \
3207 new_value = rhs; \
3208 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3209 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3210 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3211 KMP_CPU_PAUSE(); \
3212 \
3213 temp_val = *lhs; \
3214 old_value = temp_val; \
3215 new_value = rhs; \
3216 } \
3217 return old_value; \
3218 }
3219
3220// -------------------------------------------------------------------------
3221#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3222 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3223 TYPE old_value; \
3224 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3225 CMPXCHG_SWP(TYPE, BITS) \
3226 }
3227
3228ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3229ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3230ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3231
3232ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3233 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3234
3235#if (KMP_ARCH_X86)
3236ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3237 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3238ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3239 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3240#else
3241ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3242ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3243 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3244#endif // (KMP_ARCH_X86)
3245
3246// ------------------------------------------------------------------------
3247// Routines for Extended types: long double, _Quad, complex flavours (use
3248// critical section)
3249#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3250 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3251 TYPE old_value; \
3252 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3253 CRITICAL_SWP(LCK_ID) \
3254 }
3255
3256// ------------------------------------------------------------------------
3257// !!! TODO: check if we need to return void for cmplx4 routines
3258// Workaround for cmplx4. Regular routines with return value don't work
3259// on Win_32e. Let's return captured values through the additional parameter.
3260
3261#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3262 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3263 TYPE rhs, TYPE *out) { \
3264 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3265 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3266
3267#define CRITICAL_SWP_WRK(LCK_ID) \
3268 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3269 \
3270 tmp = (*lhs); \
3271 (*lhs) = (rhs); \
3272 (*out) = tmp; \
3273 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3274 return;
3275// ------------------------------------------------------------------------
3276
3277#ifdef KMP_GOMP_COMPAT
3278#define GOMP_CRITICAL_SWP_WRK(FLAG) \
3279 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3280 KMP_CHECK_GTID; \
3281 CRITICAL_SWP_WRK(0); \
3282 }
3283#else
3284#define GOMP_CRITICAL_SWP_WRK(FLAG)
3285#endif /* KMP_GOMP_COMPAT */
3286// ------------------------------------------------------------------------
3287
3288#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3289 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3290 TYPE tmp; \
3291 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3292 CRITICAL_SWP_WRK(LCK_ID) \
3293 }
3294// The end of workaround for cmplx4
3295
3296ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3297#if KMP_HAVE_QUAD
3298ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3299#endif // KMP_HAVE_QUAD
3300// cmplx4 routine to return void
3301ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3302
3303// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3304// __kmpc_atomic_cmplx4_swp
3305
3306ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3307ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3308#if KMP_HAVE_QUAD
3309ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3310#if (KMP_ARCH_X86)
3311ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3312 1) // __kmpc_atomic_float16_a16_swp
3313ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3314 1) // __kmpc_atomic_cmplx16_a16_swp
3315#endif // (KMP_ARCH_X86)
3316#endif // KMP_HAVE_QUAD
3317
3318// End of OpenMP 4.0 Capture
3319
3320#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3321
3322#undef OP_CRITICAL
3323
3324/* ------------------------------------------------------------------------ */
3325/* Generic atomic routines */
3326
3327void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3328 void (*f)(void *, void *, void *)) {
3329 KMP_DEBUG_ASSERT(__kmp_init_serial);
3330
3331 if (
3332#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3333 FALSE /* must use lock */
3334#else
3335 TRUE
3336#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3337 ) {
3338 kmp_int8 old_value, new_value;
3339
3340 old_value = *(kmp_int8 *)lhs;
3341 (*f)(&new_value, &old_value, rhs);
3342
3343 /* TODO: Should this be acquire or release? */
3344 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3345 *(kmp_int8 *)&new_value)) {
3346 KMP_CPU_PAUSE();
3347
3348 old_value = *(kmp_int8 *)lhs;
3349 (*f)(&new_value, &old_value, rhs);
3350 }
3351
3352 return;
3353 } else {
3354// All 1-byte data is of integer data type.
3355
3356#ifdef KMP_GOMP_COMPAT
3357 if (__kmp_atomic_mode == 2) {
3358 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3359 } else
3360#endif /* KMP_GOMP_COMPAT */
3361 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3362
3363 (*f)(lhs, lhs, rhs);
3364
3365#ifdef KMP_GOMP_COMPAT
3366 if (__kmp_atomic_mode == 2) {
3367 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3368 } else
3369#endif /* KMP_GOMP_COMPAT */
3370 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3371 }
3372}
3373
3374void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3375 void (*f)(void *, void *, void *)) {
3376 if (
3377#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3378 FALSE /* must use lock */
3379#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3380 TRUE /* no alignment problems */
3381#else
3382 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3383#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3384 ) {
3385 kmp_int16 old_value, new_value;
3386
3387 old_value = *(kmp_int16 *)lhs;
3388 (*f)(&new_value, &old_value, rhs);
3389
3390 /* TODO: Should this be acquire or release? */
3391 while (!KMP_COMPARE_AND_STORE_ACQ16(
3392 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3393 KMP_CPU_PAUSE();
3394
3395 old_value = *(kmp_int16 *)lhs;
3396 (*f)(&new_value, &old_value, rhs);
3397 }
3398
3399 return;
3400 } else {
3401// All 2-byte data is of integer data type.
3402
3403#ifdef KMP_GOMP_COMPAT
3404 if (__kmp_atomic_mode == 2) {
3405 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3406 } else
3407#endif /* KMP_GOMP_COMPAT */
3408 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3409
3410 (*f)(lhs, lhs, rhs);
3411
3412#ifdef KMP_GOMP_COMPAT
3413 if (__kmp_atomic_mode == 2) {
3414 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3415 } else
3416#endif /* KMP_GOMP_COMPAT */
3417 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3418 }
3419}
3420
3421void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3422 void (*f)(void *, void *, void *)) {
3423 KMP_DEBUG_ASSERT(__kmp_init_serial);
3424
3425 if (
3426// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3427// Gomp compatibility is broken if this routine is called for floats.
3428#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3429 TRUE /* no alignment problems */
3430#else
3431 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3432#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3433 ) {
3434 kmp_int32 old_value, new_value;
3435
3436 old_value = *(kmp_int32 *)lhs;
3437 (*f)(&new_value, &old_value, rhs);
3438
3439 /* TODO: Should this be acquire or release? */
3440 while (!KMP_COMPARE_AND_STORE_ACQ32(
3441 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3442 KMP_CPU_PAUSE();
3443
3444 old_value = *(kmp_int32 *)lhs;
3445 (*f)(&new_value, &old_value, rhs);
3446 }
3447
3448 return;
3449 } else {
3450// Use __kmp_atomic_lock_4i for all 4-byte data,
3451// even if it isn't of integer data type.
3452
3453#ifdef KMP_GOMP_COMPAT
3454 if (__kmp_atomic_mode == 2) {
3455 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3456 } else
3457#endif /* KMP_GOMP_COMPAT */
3458 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3459
3460 (*f)(lhs, lhs, rhs);
3461
3462#ifdef KMP_GOMP_COMPAT
3463 if (__kmp_atomic_mode == 2) {
3464 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3465 } else
3466#endif /* KMP_GOMP_COMPAT */
3467 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3468 }
3469}
3470
3471void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3472 void (*f)(void *, void *, void *)) {
3473 KMP_DEBUG_ASSERT(__kmp_init_serial);
3474 if (
3475
3476#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3477 FALSE /* must use lock */
3478#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3479 TRUE /* no alignment problems */
3480#else
3481 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3482#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3483 ) {
3484 kmp_int64 old_value, new_value;
3485
3486 old_value = *(kmp_int64 *)lhs;
3487 (*f)(&new_value, &old_value, rhs);
3488 /* TODO: Should this be acquire or release? */
3489 while (!KMP_COMPARE_AND_STORE_ACQ64(
3490 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3491 KMP_CPU_PAUSE();
3492
3493 old_value = *(kmp_int64 *)lhs;
3494 (*f)(&new_value, &old_value, rhs);
3495 }
3496
3497 return;
3498 } else {
3499// Use __kmp_atomic_lock_8i for all 8-byte data,
3500// even if it isn't of integer data type.
3501
3502#ifdef KMP_GOMP_COMPAT
3503 if (__kmp_atomic_mode == 2) {
3504 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3505 } else
3506#endif /* KMP_GOMP_COMPAT */
3507 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3508
3509 (*f)(lhs, lhs, rhs);
3510
3511#ifdef KMP_GOMP_COMPAT
3512 if (__kmp_atomic_mode == 2) {
3513 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3514 } else
3515#endif /* KMP_GOMP_COMPAT */
3516 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3517 }
3518}
3519
3520void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3521 void (*f)(void *, void *, void *)) {
3522 KMP_DEBUG_ASSERT(__kmp_init_serial);
3523
3524#ifdef KMP_GOMP_COMPAT
3525 if (__kmp_atomic_mode == 2) {
3526 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3527 } else
3528#endif /* KMP_GOMP_COMPAT */
3529 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3530
3531 (*f)(lhs, lhs, rhs);
3532
3533#ifdef KMP_GOMP_COMPAT
3534 if (__kmp_atomic_mode == 2) {
3535 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3536 } else
3537#endif /* KMP_GOMP_COMPAT */
3538 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3539}
3540
3541void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542 void (*f)(void *, void *, void *)) {
3543 KMP_DEBUG_ASSERT(__kmp_init_serial);
3544
3545#ifdef KMP_GOMP_COMPAT
3546 if (__kmp_atomic_mode == 2) {
3547 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3548 } else
3549#endif /* KMP_GOMP_COMPAT */
3550 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3551
3552 (*f)(lhs, lhs, rhs);
3553
3554#ifdef KMP_GOMP_COMPAT
3555 if (__kmp_atomic_mode == 2) {
3556 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3557 } else
3558#endif /* KMP_GOMP_COMPAT */
3559 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3560}
3561
3562void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3563 void (*f)(void *, void *, void *)) {
3564 KMP_DEBUG_ASSERT(__kmp_init_serial);
3565
3566#ifdef KMP_GOMP_COMPAT
3567 if (__kmp_atomic_mode == 2) {
3568 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3569 } else
3570#endif /* KMP_GOMP_COMPAT */
3571 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3572
3573 (*f)(lhs, lhs, rhs);
3574
3575#ifdef KMP_GOMP_COMPAT
3576 if (__kmp_atomic_mode == 2) {
3577 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3578 } else
3579#endif /* KMP_GOMP_COMPAT */
3580 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3581}
3582
3583void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3584 void (*f)(void *, void *, void *)) {
3585 KMP_DEBUG_ASSERT(__kmp_init_serial);
3586
3587#ifdef KMP_GOMP_COMPAT
3588 if (__kmp_atomic_mode == 2) {
3589 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3590 } else
3591#endif /* KMP_GOMP_COMPAT */
3592 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3593
3594 (*f)(lhs, lhs, rhs);
3595
3596#ifdef KMP_GOMP_COMPAT
3597 if (__kmp_atomic_mode == 2) {
3598 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3599 } else
3600#endif /* KMP_GOMP_COMPAT */
3601 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3602}
3603
3604// AC: same two routines as GOMP_atomic_start/end, but will be called by our
3605// compiler; duplicated in order to not use 3-party names in pure Intel code
3606// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3607void __kmpc_atomic_start(void) {
3608 int gtid = __kmp_entry_gtid();
3609 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3610 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3611}
3612
3613void __kmpc_atomic_end(void) {
3614 int gtid = __kmp_get_gtid();
3615 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3616 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3617}
3618
3623// end of file
Definition: kmp.h:222