/******************************************************//**
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been written to the log file up to the last log entry written
by the transaction. If there is a flush running, it waits and checks if the
flush flushed enough. If not, starts a new flush. */
UNIV_INTERN
void
log_write_up_to(
/*============*/
ib_uint64_t lsn, /*!< in: log sequence number up to which
the log should be written,
IB_ULONGLONG_MAX if not specified */
ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
ibool flush_to_disk)
/*!< in: TRUE if we want the written log
also to be flushed to disk */
{
log_group_t* group;
ulint start_offset;
ulint end_offset;
ulint area_start;
ulint area_end;
#ifdef UNIV_DEBUG
ulint loop_count = 0;
#endif /* UNIV_DEBUG */
ulint unlock;
if (recv_no_ibuf_operations) {
/* Recovery is running and no operations on the log files are
allowed yet (the variable name .._no_ibuf_.. is misleading) */
return;
}
loop:
#ifdef UNIV_DEBUG
loop_count++;
ut_ad(loop_count < 5);
# if 0
if (loop_count > 2) {
fprintf(stderr, "Log loop count %lu\n", loop_count);
}
# endif
#endif
mutex_enter(&(log_sys->mutex));
ut_ad(!recv_no_log_write);
if (flush_to_disk
&& log_sys->flushed_to_disk_lsn >= lsn) {
mutex_exit(&(log_sys->mutex));
return;
}
if (!flush_to_disk
&& (log_sys->written_to_all_lsn >= lsn
|| (log_sys->written_to_some_lsn >= lsn
&& wait != LOG_WAIT_ALL_GROUPS))) {
mutex_exit(&(log_sys->mutex));
return;
}
if (log_sys->n_pending_writes > 0) {
/* A write (+ possibly flush to disk) is running */
if (flush_to_disk
&& log_sys->current_flush_lsn >= lsn) {
/* The write + flush will write enough: wait for it to
complete */
goto do_waits;
}
if (!flush_to_disk
&& log_sys->write_lsn >= lsn) {
/* The write will write enough: wait for it to
complete */
goto do_waits;
}
mutex_exit(&(log_sys->mutex));
/* Wait for the write to complete and try to start a new
write */
os_event_wait(log_sys->no_flush_event);
goto loop;
}
if (!flush_to_disk
&& log_sys->buf_free == log_sys->buf_next_to_write) {
/* Nothing to write and no flush to disk requested */
mutex_exit(&(log_sys->mutex));
return;
}
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"Writing log from %llu up to lsn %llu\n",
log_sys->written_to_all_lsn,
log_sys->lsn);
}
#endif /* UNIV_DEBUG */
log_sys->n_pending_writes++;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
group->n_pending_writes++; /*!< We assume here that we have only
one log group! */
os_event_reset(log_sys->no_flush_event);
os_event_reset(log_sys->one_flushed_event);
start_offset = log_sys->buf_next_to_write;
end_offset = log_sys->buf_free;
area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
ut_ad(area_end - area_start > 0);
log_sys->write_lsn = log_sys->lsn;
if (flush_to_disk) {
log_sys->current_flush_lsn = log_sys->lsn;
}
log_sys->one_flushed = FALSE;
log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
log_block_set_checkpoint_no(
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
log_sys->next_checkpoint_no);
/* Copy the last, incompletely written, log block a log block length
up, so that when the flush operation writes from the log buffer, the
segment to write will not be changed by writers to the log */
ut_memcpy(log_sys->buf + area_end,
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
OS_FILE_LOG_BLOCK_SIZE);
log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
log_sys->write_end_offset = log_sys->buf_free;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
/* Do the write to the log files */
while (group) {
log_group_write_buf(
group, log_sys->buf + area_start,
area_end - area_start,
ut_uint64_align_down(log_sys->written_to_all_lsn,
OS_FILE_LOG_BLOCK_SIZE),
start_offset - area_start);
log_group_set_fields(group, log_sys->write_lsn);
group = UT_LIST_GET_NEXT(log_groups, group);
}
mutex_exit(&(log_sys->mutex));
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
/* O_DSYNC means the OS did not buffer the log file at all:
so we have also flushed to disk what we have written */
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
} else if (flush_to_disk) {
group = UT_LIST_GET_FIRST(log_sys->log_groups);
fil_flush(group->space_id);
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
}
mutex_enter(&(log_sys->mutex));
group = UT_LIST_GET_FIRST(log_sys->log_groups);
ut_a(group->n_pending_writes == 1);
ut_a(log_sys->n_pending_writes == 1);
group->n_pending_writes--;
log_sys->n_pending_writes--;
unlock = log_group_check_flush_completion(group);
unlock = unlock | log_sys_check_flush_completion();
log_flush_do_unlocks(unlock);
mutex_exit(&(log_sys->mutex));
return;
do_waits:
mutex_exit(&(log_sys->mutex));
switch (wait) {
case LOG_WAIT_ONE_GROUP:
os_event_wait(log_sys->one_flushed_event);
break;
case LOG_WAIT_ALL_GROUPS:
os_event_wait(log_sys->no_flush_event);
break;
#ifdef UNIV_DEBUG
case LOG_NO_WAIT:
break;
default:
ut_error;
#endif /* UNIV_DEBUG */
}
}
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been written to the log file up to the last log entry written
by the transaction. If there is a flush running, it waits and checks if the
flush flushed enough. If not, starts a new flush. */
UNIV_INTERN
void
log_write_up_to(
/*============*/
ib_uint64_t lsn, /*!< in: log sequence number up to which
the log should be written,
IB_ULONGLONG_MAX if not specified */
ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
or LOG_WAIT_ALL_GROUPS */
ibool flush_to_disk)
/*!< in: TRUE if we want the written log
also to be flushed to disk */
{
log_group_t* group;
ulint start_offset;
ulint end_offset;
ulint area_start;
ulint area_end;
#ifdef UNIV_DEBUG
ulint loop_count = 0;
#endif /* UNIV_DEBUG */
ulint unlock;
if (recv_no_ibuf_operations) {
/* Recovery is running and no operations on the log files are
allowed yet (the variable name .._no_ibuf_.. is misleading) */
return;
}
loop:
#ifdef UNIV_DEBUG
loop_count++;
ut_ad(loop_count < 5);
# if 0
if (loop_count > 2) {
fprintf(stderr, "Log loop count %lu\n", loop_count);
}
# endif
#endif
mutex_enter(&(log_sys->mutex));
ut_ad(!recv_no_log_write);
if (flush_to_disk
&& log_sys->flushed_to_disk_lsn >= lsn) {
mutex_exit(&(log_sys->mutex));
return;
}
if (!flush_to_disk
&& (log_sys->written_to_all_lsn >= lsn
|| (log_sys->written_to_some_lsn >= lsn
&& wait != LOG_WAIT_ALL_GROUPS))) {
mutex_exit(&(log_sys->mutex));
return;
}
if (log_sys->n_pending_writes > 0) {
/* A write (+ possibly flush to disk) is running */
if (flush_to_disk
&& log_sys->current_flush_lsn >= lsn) {
/* The write + flush will write enough: wait for it to
complete */
goto do_waits;
}
if (!flush_to_disk
&& log_sys->write_lsn >= lsn) {
/* The write will write enough: wait for it to
complete */
goto do_waits;
}
mutex_exit(&(log_sys->mutex));
/* Wait for the write to complete and try to start a new
write */
os_event_wait(log_sys->no_flush_event);
goto loop;
}
if (!flush_to_disk
&& log_sys->buf_free == log_sys->buf_next_to_write) {
/* Nothing to write and no flush to disk requested */
mutex_exit(&(log_sys->mutex));
return;
}
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr,
"Writing log from %llu up to lsn %llu\n",
log_sys->written_to_all_lsn,
log_sys->lsn);
}
#endif /* UNIV_DEBUG */
log_sys->n_pending_writes++;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
group->n_pending_writes++; /*!< We assume here that we have only
one log group! */
os_event_reset(log_sys->no_flush_event);
os_event_reset(log_sys->one_flushed_event);
start_offset = log_sys->buf_next_to_write;
end_offset = log_sys->buf_free;
area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
ut_ad(area_end - area_start > 0);
log_sys->write_lsn = log_sys->lsn;
if (flush_to_disk) {
log_sys->current_flush_lsn = log_sys->lsn;
}
log_sys->one_flushed = FALSE;
log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
log_block_set_checkpoint_no(
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
log_sys->next_checkpoint_no);
/* Copy the last, incompletely written, log block a log block length
up, so that when the flush operation writes from the log buffer, the
segment to write will not be changed by writers to the log */
ut_memcpy(log_sys->buf + area_end,
log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
OS_FILE_LOG_BLOCK_SIZE);
log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
log_sys->write_end_offset = log_sys->buf_free;
group = UT_LIST_GET_FIRST(log_sys->log_groups);
/* Do the write to the log files */
while (group) {
log_group_write_buf(
group, log_sys->buf + area_start,
area_end - area_start,
ut_uint64_align_down(log_sys->written_to_all_lsn,
OS_FILE_LOG_BLOCK_SIZE),
start_offset - area_start);
log_group_set_fields(group, log_sys->write_lsn);
group = UT_LIST_GET_NEXT(log_groups, group);
}
mutex_exit(&(log_sys->mutex));
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
/* O_DSYNC means the OS did not buffer the log file at all:
so we have also flushed to disk what we have written */
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
} else if (flush_to_disk) {
group = UT_LIST_GET_FIRST(log_sys->log_groups);
fil_flush(group->space_id);
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
}
mutex_enter(&(log_sys->mutex));
group = UT_LIST_GET_FIRST(log_sys->log_groups);
ut_a(group->n_pending_writes == 1);
ut_a(log_sys->n_pending_writes == 1);
group->n_pending_writes--;
log_sys->n_pending_writes--;
unlock = log_group_check_flush_completion(group);
unlock = unlock | log_sys_check_flush_completion();
log_flush_do_unlocks(unlock);
mutex_exit(&(log_sys->mutex));
return;
do_waits:
mutex_exit(&(log_sys->mutex));
switch (wait) {
case LOG_WAIT_ONE_GROUP:
os_event_wait(log_sys->one_flushed_event);
break;
case LOG_WAIT_ALL_GROUPS:
os_event_wait(log_sys->no_flush_event);
break;
#ifdef UNIV_DEBUG
case LOG_NO_WAIT:
break;
default:
ut_error;
#endif /* UNIV_DEBUG */
}
}