// -*- C++ -*- // Copyright (C) 2007-2024 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 3, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation. // You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // . /** @file parallel/multiway_mergesort.h * @brief Parallel multiway merge sort. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Johannes Singler. #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 #include #include #include #include #include namespace __gnu_parallel { /** @brief Subsequence description. */ template struct _Piece { typedef _DifferenceTp _DifferenceType; /** @brief Begin of subsequence. */ _DifferenceType _M_begin; /** @brief End of subsequence. */ _DifferenceType _M_end; }; /** @brief Data accessed by all threads. * * PMWMS = parallel multiway mergesort */ template struct _PMWMSSortingData { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::difference_type _DifferenceType; /** @brief Number of threads involved. */ _ThreadIndex _M_num_threads; /** @brief Input __begin. */ _RAIter _M_source; /** @brief Start indices, per thread. */ _DifferenceType* _M_starts; /** @brief Storage in which to sort. */ _ValueType** _M_temporary; /** @brief Samples. */ _ValueType* _M_samples; /** @brief Offsets to add to the found positions. */ _DifferenceType* _M_offsets; /** @brief Pieces of data to merge @c [thread][__sequence] */ std::vector<_Piece<_DifferenceType> >* _M_pieces; }; /** * @brief Select _M_samples from a sequence. * @param __sd Pointer to algorithm data. _Result will be placed in * @c __sd->_M_samples. * @param __num_samples Number of _M_samples to select. */ template void __determine_samples(_PMWMSSortingData<_RAIter>* __sd, _DifferenceTp __num_samples) { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; typedef _DifferenceTp _DifferenceType; _ThreadIndex __iam = omp_get_thread_num(); _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], __num_samples + 1, __es); for (_DifferenceType __i = 0; __i < __num_samples; ++__i) ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]); delete[] __es; } /** @brief Split consistently. */ template struct _SplitConsistently { }; /** @brief Split by exact splitting. */ template struct _SplitConsistently { void operator()(const _ThreadIndex __iam, _PMWMSSortingData<_RAIter>* __sd, _Compare& __comp, const typename std::iterator_traits<_RAIter>::difference_type __num_samples) const { # pragma omp barrier std::vector > __seqs(__sd->_M_num_threads); for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) __seqs[__s] = std::make_pair(__sd->_M_temporary[__s], __sd->_M_temporary[__s] + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s])); std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads); // if not last thread if (__iam < __sd->_M_num_threads - 1) multiseq_partition(__seqs.begin(), __seqs.end(), __sd->_M_starts[__iam + 1], __offsets.begin(), __comp); for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) { // for each sequence if (__iam < (__sd->_M_num_threads - 1)) __sd->_M_pieces[__iam][__seq]._M_end = __offsets[__seq] - __seqs[__seq].first; else // very end of this sequence __sd->_M_pieces[__iam][__seq]._M_end = __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; } # pragma omp barrier for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) { // For each sequence. if (__iam > 0) __sd->_M_pieces[__iam][__seq]._M_begin = __sd->_M_pieces[__iam - 1][__seq]._M_end; else // Absolute beginning. __sd->_M_pieces[__iam][__seq]._M_begin = 0; } } }; /** @brief Split by sampling. */ template struct _SplitConsistently { void operator()(const _ThreadIndex __iam, _PMWMSSortingData<_RAIter>* __sd, _Compare& __comp, const typename std::iterator_traits<_RAIter>::difference_type __num_samples) const { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::difference_type _DifferenceType; __determine_samples(__sd, __num_samples); # pragma omp barrier # pragma omp single __gnu_sequential::sort(__sd->_M_samples, __sd->_M_samples + (__num_samples * __sd->_M_num_threads), __comp); # pragma omp barrier for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) { // For each sequence. if (__num_samples * __iam > 0) __sd->_M_pieces[__iam][__s]._M_begin = std::lower_bound(__sd->_M_temporary[__s], __sd->_M_temporary[__s] + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]), __sd->_M_samples[__num_samples * __iam], __comp) - __sd->_M_temporary[__s]; else // Absolute beginning. __sd->_M_pieces[__iam][__s]._M_begin = 0; if ((__num_samples * (__iam + 1)) < (__num_samples * __sd->_M_num_threads)) __sd->_M_pieces[__iam][__s]._M_end = std::lower_bound(__sd->_M_temporary[__s], __sd->_M_temporary[__s] + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]), __sd->_M_samples[__num_samples * (__iam + 1)], __comp) - __sd->_M_temporary[__s]; else // Absolute end. __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]); } } }; template struct __possibly_stable_sort { }; template struct __possibly_stable_sort { void operator()(const _RAIter& __begin, const _RAIter& __end, _Compare& __comp) const { __gnu_sequential::stable_sort(__begin, __end, __comp); } }; template struct __possibly_stable_sort { void operator()(const _RAIter __begin, const _RAIter __end, _Compare& __comp) const { __gnu_sequential::sort(__begin, __end, __comp); } }; template struct __possibly_stable_multiway_merge { }; template struct __possibly_stable_multiway_merge { void operator()(const _Seq_RAIter& __seqs_begin, const _Seq_RAIter& __seqs_end, const _RAIter& __target, _Compare& __comp, _DiffType __length_am) const { stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp, sequential_tag()); } }; template struct __possibly_stable_multiway_merge { void operator()(const _Seq_RAIter& __seqs_begin, const _Seq_RAIter& __seqs_end, const _RAIter& __target, _Compare& __comp, _DiffType __length_am) const { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp, sequential_tag()); } }; /** @brief PMWMS code executed by each thread. * @param __sd Pointer to algorithm data. * @param __comp Comparator. */ template void parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, _Compare& __comp) { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::difference_type _DifferenceType; _ThreadIndex __iam = omp_get_thread_num(); // Length of this thread's chunk, before merging. _DifferenceType __length_local = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; // Sort in temporary storage, leave space for sentinel. typedef _ValueType* _SortingPlacesIterator; __sd->_M_temporary[__iam] = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * (__length_local + 1))); // Copy there. std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], __sd->_M_source + __sd->_M_starts[__iam] + __length_local, __sd->_M_temporary[__iam]); __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() (__sd->_M_temporary[__iam], __sd->_M_temporary[__iam] + __length_local, __comp); // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], // __sd->_M_temporary[__iam] + __length_local. // No barrier here: Synchronization is done by the splitting routine. _DifferenceType __num_samples = _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>() (__iam, __sd, __comp, __num_samples); // Offset from __target __begin, __length after merging. _DifferenceType __offset = 0, __length_am = 0; for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) { __length_am += (__sd->_M_pieces[__iam][__s]._M_end - __sd->_M_pieces[__iam][__s]._M_begin); __offset += __sd->_M_pieces[__iam][__s]._M_begin; } typedef std::vector< std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > _SeqVector; _SeqVector __seqs(__sd->_M_num_threads); for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) { __seqs[__s] = std::make_pair(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin, __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end); } __possibly_stable_multiway_merge< __stable, typename _SeqVector::iterator, _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(), __sd->_M_source + __offset, __comp, __length_am); # pragma omp barrier for (_DifferenceType __i = 0; __i < __length_local; ++__i) __sd->_M_temporary[__iam][__i].~_ValueType(); ::operator delete(__sd->_M_temporary[__iam]); } /** @brief PMWMS main call. * @param __begin Begin iterator of sequence. * @param __end End iterator of sequence. * @param __comp Comparator. * @param __num_threads Number of threads to use. */ template void parallel_sort_mwms(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads) { _GLIBCXX_CALL(__end - __begin) typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::difference_type _DifferenceType; _DifferenceType __n = __end - __begin; if (__n <= 1) return; // at least one element per thread if (__num_threads > __n) __num_threads = static_cast<_ThreadIndex>(__n); // shared variables _PMWMSSortingData<_RAIter> __sd; _DifferenceType* __starts; _DifferenceType __size; # pragma omp parallel num_threads(__num_threads) { __num_threads = omp_get_num_threads(); //no more threads than requested # pragma omp single { __sd._M_num_threads = __num_threads; __sd._M_source = __begin; __sd._M_temporary = new _ValueType*[__num_threads]; if (!__exact) { __size = (_Settings::get().sort_mwms_oversampling * __num_threads - 1) * __num_threads; __sd._M_samples = static_cast<_ValueType*> (::operator new(__size * sizeof(_ValueType))); } else __sd._M_samples = 0; __sd._M_offsets = new _DifferenceType[__num_threads - 1]; __sd._M_pieces = new std::vector<_Piece<_DifferenceType> >[__num_threads]; for (_ThreadIndex __s = 0; __s < __num_threads; ++__s) __sd._M_pieces[__s].resize(__num_threads); __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; _DifferenceType __chunk_length = __n / __num_threads; _DifferenceType __split = __n % __num_threads; _DifferenceType __pos = 0; for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) { __starts[__i] = __pos; __pos += ((__i < __split) ? (__chunk_length + 1) : __chunk_length); } __starts[__num_threads] = __pos; } //single // Now sort in parallel. parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); } //parallel delete[] __starts; delete[] __sd._M_temporary; if (!__exact) { for (_DifferenceType __i = 0; __i < __size; ++__i) __sd._M_samples[__i].~_ValueType(); ::operator delete(__sd._M_samples); } delete[] __sd._M_offsets; delete[] __sd._M_pieces; } } //namespace __gnu_parallel #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */