| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify it under |
| 6 | the terms of the GNU General Public License as published by the Free Software |
| 7 | Foundation; version 2 of the License. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 11 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License along with |
| 14 | this program; if not, write to the Free Software Foundation, Inc., |
| 15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 16 | |
| 17 | *****************************************************************************/ |
| 18 | |
| 19 | /**************************************************//** |
| 20 | @file os/os0event.cc |
| 21 | The interface to the operating system condition variables. |
| 22 | |
| 23 | Created 2012-09-23 Sunny Bains |
| 24 | *******************************************************/ |
| 25 | |
| 26 | #include "os0event.h" |
| 27 | #include "ut0mutex.h" |
| 28 | #include "ha_prototypes.h" |
| 29 | #include "ut0new.h" |
| 30 | |
| 31 | #ifdef _WIN32 |
| 32 | #include <windows.h> |
| 33 | #include <synchapi.h> |
| 34 | #endif /* _WIN32 */ |
| 35 | |
| 36 | #include <list> |
| 37 | |
| 38 | #ifdef _WIN32 |
| 39 | /** Native condition variable. */ |
| 40 | typedef CONDITION_VARIABLE os_cond_t; |
| 41 | #else |
| 42 | /** Native condition variable */ |
| 43 | typedef pthread_cond_t os_cond_t; |
| 44 | #endif /* _WIN32 */ |
| 45 | |
| 46 | typedef std::list<os_event_t, ut_allocator<os_event_t> > os_event_list_t; |
| 47 | typedef os_event_list_t::iterator event_iter_t; |
| 48 | |
| 49 | /** InnoDB condition variable. */ |
| 50 | struct os_event { |
| 51 | os_event() UNIV_NOTHROW; |
| 52 | |
| 53 | ~os_event() UNIV_NOTHROW; |
| 54 | |
| 55 | /** |
| 56 | Destroys a condition variable */ |
| 57 | void destroy() UNIV_NOTHROW |
| 58 | { |
| 59 | #ifndef _WIN32 |
| 60 | int ret = pthread_cond_destroy(&cond_var); |
| 61 | ut_a(ret == 0); |
| 62 | #endif /* !_WIN32 */ |
| 63 | |
| 64 | mutex.destroy(); |
| 65 | } |
| 66 | |
| 67 | /** Set the event */ |
| 68 | void set() UNIV_NOTHROW |
| 69 | { |
| 70 | mutex.enter(); |
| 71 | |
| 72 | if (!m_set) { |
| 73 | broadcast(); |
| 74 | } |
| 75 | |
| 76 | mutex.exit(); |
| 77 | } |
| 78 | |
| 79 | int64_t reset() UNIV_NOTHROW |
| 80 | { |
| 81 | mutex.enter(); |
| 82 | |
| 83 | if (m_set) { |
| 84 | m_set = false; |
| 85 | } |
| 86 | |
| 87 | int64_t ret = signal_count; |
| 88 | |
| 89 | mutex.exit(); |
| 90 | |
| 91 | return(ret); |
| 92 | } |
| 93 | |
| 94 | /** |
| 95 | Waits for an event object until it is in the signaled state. |
| 96 | |
| 97 | Typically, if the event has been signalled after the os_event_reset() |
| 98 | we'll return immediately because event->m_set == true. |
| 99 | There are, however, situations (e.g.: sync_array code) where we may |
| 100 | lose this information. For example: |
| 101 | |
| 102 | thread A calls os_event_reset() |
| 103 | thread B calls os_event_set() [event->m_set == true] |
| 104 | thread C calls os_event_reset() [event->m_set == false] |
| 105 | thread A calls os_event_wait() [infinite wait!] |
| 106 | thread C calls os_event_wait() [infinite wait!] |
| 107 | |
| 108 | Where such a scenario is possible, to avoid infinite wait, the |
| 109 | value returned by reset() should be passed in as |
| 110 | reset_sig_count. */ |
| 111 | void wait_low(int64_t reset_sig_count) UNIV_NOTHROW; |
| 112 | |
| 113 | /** |
| 114 | Waits for an event object until it is in the signaled state or |
| 115 | a timeout is exceeded. |
| 116 | @param time_in_usec - timeout in microseconds, |
| 117 | or OS_SYNC_INFINITE_TIME |
| 118 | @param reset_sig_count- zero or the value returned by |
| 119 | previous call of os_event_reset(). |
| 120 | @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ |
| 121 | ulint wait_time_low( |
| 122 | ulint time_in_usec, |
| 123 | int64_t reset_sig_count) UNIV_NOTHROW; |
| 124 | |
| 125 | /** @return true if the event is in the signalled state. */ |
| 126 | bool is_set() const UNIV_NOTHROW |
| 127 | { |
| 128 | return(m_set); |
| 129 | } |
| 130 | |
| 131 | private: |
| 132 | /** |
| 133 | Initialize a condition variable */ |
| 134 | void init() UNIV_NOTHROW |
| 135 | { |
| 136 | |
| 137 | mutex.init(); |
| 138 | |
| 139 | #ifdef _WIN32 |
| 140 | InitializeConditionVariable(&cond_var); |
| 141 | #else |
| 142 | { |
| 143 | int ret; |
| 144 | |
| 145 | ret = pthread_cond_init(&cond_var, NULL); |
| 146 | ut_a(ret == 0); |
| 147 | } |
| 148 | #endif /* _WIN32 */ |
| 149 | } |
| 150 | |
| 151 | /** |
| 152 | Wait on condition variable */ |
| 153 | void wait() UNIV_NOTHROW |
| 154 | { |
| 155 | #ifdef _WIN32 |
| 156 | if (!SleepConditionVariableCS(&cond_var, mutex, INFINITE)) { |
| 157 | ut_error; |
| 158 | } |
| 159 | #else |
| 160 | { |
| 161 | int ret; |
| 162 | |
| 163 | ret = pthread_cond_wait(&cond_var, mutex); |
| 164 | ut_a(ret == 0); |
| 165 | } |
| 166 | #endif /* _WIN32 */ |
| 167 | } |
| 168 | |
| 169 | /** |
| 170 | Wakes all threads waiting for condition variable */ |
| 171 | void broadcast() UNIV_NOTHROW |
| 172 | { |
| 173 | m_set = true; |
| 174 | ++signal_count; |
| 175 | |
| 176 | #ifdef _WIN32 |
| 177 | WakeAllConditionVariable(&cond_var); |
| 178 | #else |
| 179 | { |
| 180 | int ret; |
| 181 | |
| 182 | ret = pthread_cond_broadcast(&cond_var); |
| 183 | ut_a(ret == 0); |
| 184 | } |
| 185 | #endif /* _WIN32 */ |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | Wakes one thread waiting for condition variable */ |
| 190 | void signal() UNIV_NOTHROW |
| 191 | { |
| 192 | #ifdef _WIN32 |
| 193 | WakeConditionVariable(&cond_var); |
| 194 | #else |
| 195 | { |
| 196 | int ret; |
| 197 | |
| 198 | ret = pthread_cond_signal(&cond_var); |
| 199 | ut_a(ret == 0); |
| 200 | } |
| 201 | #endif /* _WIN32 */ |
| 202 | } |
| 203 | |
| 204 | /** |
| 205 | Do a timed wait on condition variable. |
| 206 | @param abstime - timeout |
| 207 | @param time_in_ms - timeout in milliseconds. |
| 208 | @return true if timed out, false otherwise */ |
| 209 | bool timed_wait( |
| 210 | #ifndef _WIN32 |
| 211 | const timespec* abstime |
| 212 | #else |
| 213 | DWORD time_in_ms |
| 214 | #endif /* !_WIN32 */ |
| 215 | ); |
| 216 | |
| 217 | private: |
| 218 | bool m_set; /*!< this is true when the |
| 219 | event is in the signaled |
| 220 | state, i.e., a thread does |
| 221 | not stop if it tries to wait |
| 222 | for this event */ |
| 223 | int64_t signal_count; /*!< this is incremented |
| 224 | each time the event becomes |
| 225 | signaled */ |
| 226 | EventMutex mutex; /*!< this mutex protects |
| 227 | the next fields */ |
| 228 | |
| 229 | |
| 230 | os_cond_t cond_var; /*!< condition variable is |
| 231 | used in waiting for the event */ |
| 232 | |
| 233 | public: |
| 234 | event_iter_t event_iter; /*!< For O(1) removal from |
| 235 | list */ |
| 236 | protected: |
| 237 | // Disable copying |
| 238 | os_event(const os_event&); |
| 239 | os_event& operator=(const os_event&); |
| 240 | }; |
| 241 | |
| 242 | /** |
| 243 | Do a timed wait on condition variable. |
| 244 | @param abstime - absolute time to wait |
| 245 | @param time_in_ms - timeout in milliseconds |
| 246 | @return true if timed out */ |
| 247 | bool |
| 248 | os_event::timed_wait( |
| 249 | #ifndef _WIN32 |
| 250 | const timespec* abstime |
| 251 | #else |
| 252 | DWORD time_in_ms |
| 253 | #endif /* !_WIN32 */ |
| 254 | ) |
| 255 | { |
| 256 | #ifdef _WIN32 |
| 257 | BOOL ret; |
| 258 | |
| 259 | ret = SleepConditionVariableCS(&cond_var, mutex, time_in_ms); |
| 260 | |
| 261 | if (!ret) { |
| 262 | DWORD err = GetLastError(); |
| 263 | |
| 264 | /* FQDN=msdn.microsoft.com |
| 265 | @see http://$FQDN/en-us/library/ms686301%28VS.85%29.aspx, |
| 266 | |
| 267 | "Condition variables are subject to spurious wakeups |
| 268 | (those not associated with an explicit wake) and stolen wakeups |
| 269 | (another thread manages to run before the woken thread)." |
| 270 | Check for both types of timeouts. |
| 271 | Conditions are checked by the caller.*/ |
| 272 | if (err == WAIT_TIMEOUT || err == ERROR_TIMEOUT) { |
| 273 | return(true); |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | ut_a(ret); |
| 278 | |
| 279 | return(false); |
| 280 | #else |
| 281 | int ret; |
| 282 | |
| 283 | ret = pthread_cond_timedwait(&cond_var, mutex, abstime); |
| 284 | |
| 285 | switch (ret) { |
| 286 | case 0: |
| 287 | case ETIMEDOUT: |
| 288 | /* We play it safe by checking for EINTR even though |
| 289 | according to the POSIX documentation it can't return EINTR. */ |
| 290 | case EINTR: |
| 291 | break; |
| 292 | |
| 293 | default: |
| 294 | ib::error() << "pthread_cond_timedwait() returned: " << ret |
| 295 | << ": abstime={" << abstime->tv_sec << "," |
| 296 | << abstime->tv_nsec << "}" ; |
| 297 | ut_error; |
| 298 | } |
| 299 | |
| 300 | return(ret == ETIMEDOUT); |
| 301 | #endif /* _WIN32 */ |
| 302 | } |
| 303 | |
| 304 | /** |
| 305 | Waits for an event object until it is in the signaled state. |
| 306 | |
| 307 | Typically, if the event has been signalled after the os_event_reset() |
| 308 | we'll return immediately because event->m_set == true. |
| 309 | There are, however, situations (e.g.: sync_array code) where we may |
| 310 | lose this information. For example: |
| 311 | |
| 312 | thread A calls os_event_reset() |
| 313 | thread B calls os_event_set() [event->m_set == true] |
| 314 | thread C calls os_event_reset() [event->m_set == false] |
| 315 | thread A calls os_event_wait() [infinite wait!] |
| 316 | thread C calls os_event_wait() [infinite wait!] |
| 317 | |
| 318 | Where such a scenario is possible, to avoid infinite wait, the |
| 319 | value returned by reset() should be passed in as |
| 320 | reset_sig_count. */ |
| 321 | void |
| 322 | os_event::wait_low( |
| 323 | int64_t reset_sig_count) UNIV_NOTHROW |
| 324 | { |
| 325 | mutex.enter(); |
| 326 | |
| 327 | if (!reset_sig_count) { |
| 328 | reset_sig_count = signal_count; |
| 329 | } |
| 330 | |
| 331 | while (!m_set && signal_count == reset_sig_count) { |
| 332 | |
| 333 | wait(); |
| 334 | |
| 335 | /* Spurious wakeups may occur: we have to check if the |
| 336 | event really has been signaled after we came here to wait. */ |
| 337 | } |
| 338 | |
| 339 | mutex.exit(); |
| 340 | } |
| 341 | |
| 342 | /** |
| 343 | Waits for an event object until it is in the signaled state or |
| 344 | a timeout is exceeded. |
| 345 | @param time_in_usec - timeout in microseconds, or OS_SYNC_INFINITE_TIME |
| 346 | @param reset_sig_count - zero or the value returned by previous call |
| 347 | of os_event_reset(). |
| 348 | @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ |
| 349 | ulint |
| 350 | os_event::wait_time_low( |
| 351 | ulint time_in_usec, |
| 352 | int64_t reset_sig_count) UNIV_NOTHROW |
| 353 | { |
| 354 | bool timed_out = false; |
| 355 | |
| 356 | #ifdef _WIN32 |
| 357 | DWORD time_in_ms; |
| 358 | |
| 359 | if (time_in_usec != OS_SYNC_INFINITE_TIME) { |
| 360 | time_in_ms = DWORD(time_in_usec / 1000); |
| 361 | } else { |
| 362 | time_in_ms = INFINITE; |
| 363 | } |
| 364 | #else |
| 365 | struct timespec abstime; |
| 366 | |
| 367 | if (time_in_usec != OS_SYNC_INFINITE_TIME) { |
| 368 | struct timeval tv; |
| 369 | int ret; |
| 370 | ulint sec; |
| 371 | ulint usec; |
| 372 | |
| 373 | ret = ut_usectime(&sec, &usec); |
| 374 | ut_a(ret == 0); |
| 375 | |
| 376 | tv.tv_sec = sec; |
| 377 | tv.tv_usec = usec; |
| 378 | |
| 379 | tv.tv_usec += time_in_usec; |
| 380 | |
| 381 | abstime.tv_sec = tv.tv_sec + tv.tv_usec / 1000000; |
| 382 | abstime.tv_nsec = tv.tv_usec % 1000000 * 1000; |
| 383 | } else { |
| 384 | abstime.tv_nsec = 999999999; |
| 385 | abstime.tv_sec = (time_t) ULINT_MAX; |
| 386 | } |
| 387 | |
| 388 | ut_a(abstime.tv_nsec <= 999999999); |
| 389 | |
| 390 | #endif /* _WIN32 */ |
| 391 | |
| 392 | mutex.enter(); |
| 393 | |
| 394 | if (!reset_sig_count) { |
| 395 | reset_sig_count = signal_count; |
| 396 | } |
| 397 | |
| 398 | do { |
| 399 | if (m_set || signal_count != reset_sig_count) { |
| 400 | |
| 401 | break; |
| 402 | } |
| 403 | |
| 404 | #ifndef _WIN32 |
| 405 | timed_out = timed_wait(&abstime); |
| 406 | #else |
| 407 | timed_out = timed_wait(time_in_ms); |
| 408 | #endif /* !_WIN32 */ |
| 409 | |
| 410 | } while (!timed_out); |
| 411 | |
| 412 | mutex.exit(); |
| 413 | |
| 414 | return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0); |
| 415 | } |
| 416 | |
| 417 | /** Constructor */ |
| 418 | os_event::os_event() UNIV_NOTHROW |
| 419 | { |
| 420 | init(); |
| 421 | |
| 422 | m_set = false; |
| 423 | |
| 424 | /* We return this value in os_event_reset(), |
| 425 | which can then be be used to pass to the |
| 426 | os_event_wait_low(). The value of zero is |
| 427 | reserved in os_event_wait_low() for the case |
| 428 | when the caller does not want to pass any |
| 429 | signal_count value. To distinguish between |
| 430 | the two cases we initialize signal_count |
| 431 | to 1 here. */ |
| 432 | |
| 433 | signal_count = 1; |
| 434 | } |
| 435 | |
| 436 | /** Destructor */ |
| 437 | os_event::~os_event() UNIV_NOTHROW |
| 438 | { |
| 439 | destroy(); |
| 440 | } |
| 441 | |
| 442 | /** |
| 443 | Creates an event semaphore, i.e., a semaphore which may just have two |
| 444 | states: signaled and nonsignaled. The created event is manual reset: it |
| 445 | must be reset explicitly by calling sync_os_reset_event. |
| 446 | @return the event handle */ |
| 447 | os_event_t os_event_create(const char*) |
| 448 | { |
| 449 | return(UT_NEW_NOKEY(os_event())); |
| 450 | } |
| 451 | |
| 452 | /** |
| 453 | Check if the event is set. |
| 454 | @return true if set */ |
| 455 | bool |
| 456 | os_event_is_set( |
| 457 | /*============*/ |
| 458 | const os_event_t event) /*!< in: event to test */ |
| 459 | { |
| 460 | return(event->is_set()); |
| 461 | } |
| 462 | |
| 463 | /** |
| 464 | Sets an event semaphore to the signaled state: lets waiting threads |
| 465 | proceed. */ |
| 466 | void |
| 467 | os_event_set( |
| 468 | /*=========*/ |
| 469 | os_event_t event) /*!< in/out: event to set */ |
| 470 | { |
| 471 | event->set(); |
| 472 | } |
| 473 | |
| 474 | /** |
| 475 | Resets an event semaphore to the nonsignaled state. Waiting threads will |
| 476 | stop to wait for the event. |
| 477 | The return value should be passed to os_even_wait_low() if it is desired |
| 478 | that this thread should not wait in case of an intervening call to |
| 479 | os_event_set() between this os_event_reset() and the |
| 480 | os_event_wait_low() call. See comments for os_event_wait_low(). |
| 481 | @return current signal_count. */ |
| 482 | int64_t |
| 483 | os_event_reset( |
| 484 | /*===========*/ |
| 485 | os_event_t event) /*!< in/out: event to reset */ |
| 486 | { |
| 487 | return(event->reset()); |
| 488 | } |
| 489 | |
| 490 | /** |
| 491 | Waits for an event object until it is in the signaled state or |
| 492 | a timeout is exceeded. |
| 493 | @return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ |
| 494 | ulint |
| 495 | os_event_wait_time_low( |
| 496 | /*===================*/ |
| 497 | os_event_t event, /*!< in/out: event to wait */ |
| 498 | ulint time_in_usec, /*!< in: timeout in |
| 499 | microseconds, or |
| 500 | OS_SYNC_INFINITE_TIME */ |
| 501 | int64_t reset_sig_count) /*!< in: zero or the value |
| 502 | returned by previous call of |
| 503 | os_event_reset(). */ |
| 504 | { |
| 505 | return(event->wait_time_low(time_in_usec, reset_sig_count)); |
| 506 | } |
| 507 | |
| 508 | /** |
| 509 | Waits for an event object until it is in the signaled state. |
| 510 | |
| 511 | Where such a scenario is possible, to avoid infinite wait, the |
| 512 | value returned by os_event_reset() should be passed in as |
| 513 | reset_sig_count. */ |
| 514 | void |
| 515 | os_event_wait_low( |
| 516 | /*==============*/ |
| 517 | os_event_t event, /*!< in: event to wait */ |
| 518 | int64_t reset_sig_count) /*!< in: zero or the value |
| 519 | returned by previous call of |
| 520 | os_event_reset(). */ |
| 521 | { |
| 522 | event->wait_low(reset_sig_count); |
| 523 | } |
| 524 | |
| 525 | /** |
| 526 | Frees an event object. */ |
| 527 | void |
| 528 | os_event_destroy( |
| 529 | /*=============*/ |
| 530 | os_event_t& event) /*!< in/own: event to free */ |
| 531 | |
| 532 | { |
| 533 | if (event != NULL) { |
| 534 | UT_DELETE(event); |
| 535 | event = NULL; |
| 536 | } |
| 537 | } |
| 538 | |