mysql源码解读 -— insert插入的流程

一、插入流程

在分析了整个SqlCmd的流程后,下来深入看一下插入数据要做的流程动作,数据库最常用的是查询,但是数据的插入也不遑多让。insert是DML操作,所以仍然从上篇的相关文件即sql_insert.cc开始看起,看它的execute这个虚拟函数是如何执行的,从这里出发,直到文件落盘,大致分为以下几步:
1、打开插入表并准备各种检查工作,包括区分Select类型的Insert等。
2、锁定表并确定是否启动Bulk插入。
3、写记录到表的Buffer,这里就开始进入引擎层。
4、提交事务,其中分涉及到两阶段事务的提交。
5、刷盘并提交日志,包括是否WAL机制。
下面就对这些进行详细分析。

二、源码分析

在前面的分析中知道执行到分发调用(插入、查询等调用的都是同一个),调用bool Sql_cmd_dml::execute(),它调用execute_inner,这里先不急着进入代码,先看看在sql_insert.h中定义了多少类,不看不知道,一看吓一跳,还真得不少Query_result_insert、Query_result_create、Sql_cmd_insert_base、Sql_cmd_insert_values、Sql_cmd_insert_select:

class Alter_info;
class Field;
class Item;
class Query_expression;
class THD;
struct HA_CREATE_INFO;
struct handlerton;

using List_item = mem_root_deque<Item *>;
struct MYSQL_LOCK;

bool check_that_all_fields_are_given_values(THD *thd, TABLE *entry,
                                            TABLE_LIST *table_list);
void prepare_triggers_for_insert_stmt(THD *thd, TABLE *table);
bool write_record(THD *thd, TABLE *table, COPY_INFO *info, COPY_INFO *update);
bool validate_default_values_of_unset_fields(THD *thd, TABLE *table);

class Query_result_insert : public Query_result_interceptor {
 public:
  /// The table used for insertion of rows
  TABLE_LIST *table_list;
  TABLE *table{nullptr};

 private:
  /**
     The columns of the table to be inserted into, *or* the columns of the
     table from which values are selected. For legacy reasons both are
     allowed.
   */
  mem_root_deque<Item *> *fields;

 protected:
  /// ha_start_bulk_insert has been called. Never cleared.
  bool bulk_insert_started{false};

 public:
  ulonglong autoinc_value_of_last_inserted_row{0};  // autogenerated or not
  COPY_INFO info;
  COPY_INFO update;  ///< the UPDATE part of "info"
  bool insert_into_view;

  Query_result_insert(TABLE_LIST *table_list_par,
                      mem_root_deque<Item *> *target_columns,
                      mem_root_deque<Item *> *target_or_source_columns,
                      mem_root_deque<Item *> *update_fields,
                      mem_root_deque<Item *> *update_values,
                      enum_duplicates duplic)
      : Query_result_interceptor(),
        table_list(table_list_par),
        fields(target_or_source_columns),
        info(COPY_INFO::INSERT_OPERATION, target_columns,
             // manage_defaults
             (target_columns == nullptr || !target_columns->empty()), duplic),
        update(COPY_INFO::UPDATE_OPERATION, update_fields, update_values),
        insert_into_view(table_list_par && table_list_par->is_view()) {
    assert(target_or_source_columns != nullptr);
    assert(target_columns == target_or_source_columns ||
           target_columns == nullptr);
  }

 public:
  bool need_explain_interceptor() const override { return true; }
  bool prepare(THD *thd, const mem_root_deque<Item *> &list,
               Query_expression *u) override;
  bool start_execution(THD *thd) override;
  bool send_data(THD *thd, const mem_root_deque<Item *> &items) override;
  virtual void store_values(THD *thd, const mem_root_deque<Item *> &values);
  void send_error(THD *thd, uint errcode, const char *err) override;
  bool send_eof(THD *thd) override;
  void abort_result_set(THD *thd) override;
  void cleanup(THD *thd) override;

 private:
  /**
    Indicates whether this statement should be written to binary log's
    transactional cache in statement mode.
  */
  virtual bool stmt_binlog_is_trans() const;
};

/**
   @todo This class inherits a class which is non-abstract. This is not in
   line with good programming practices and the inheritance should be broken
   up.
*/
class Query_result_create final : public Query_result_insert {
  /// Handle for table to be created
  TABLE_LIST *create_table;
  /// Contains further information for table creation
  HA_CREATE_INFO *create_info{nullptr};
  /// Contains further information for table creation
  Alter_info *alter_info{nullptr};
  Field **field;
  /// List of tables that are select from
  TABLE_LIST *select_tables;
  /// Pointer to first field in table generated from query expression
  Field **table_fields{nullptr};
  /// lock data for tmp table
  MYSQL_LOCK *m_lock{nullptr};
  /// m_lock or thd->extra_lock
  MYSQL_LOCK **m_plock{nullptr};
  /**
    If table being created has SE supporting atomic DDL, pointer to SE's
    handlerton object to be used for calling SE post-DDL hook, nullptr -
    otherwise.
  */
  handlerton *m_post_ddl_ht{nullptr};

 public:
  Query_result_create(TABLE_LIST *create_table_arg,
                      mem_root_deque<Item *> *fields, enum_duplicates duplic,
                      TABLE_LIST *select_tables_arg);

  bool prepare(THD *thd, const mem_root_deque<Item *> &list,
               Query_expression *u) override;
  void store_values(THD *thd, const mem_root_deque<Item *> &values) override;
  void send_error(THD *thd, uint errcode, const char *err) override;
  bool send_eof(THD *thd) override;
  void abort_result_set(THD *thd) override;
  bool create_table_for_query_block(THD *thd) override;
  bool start_execution(THD *thd) override;
  void set_two_fields(HA_CREATE_INFO *create_info_arg,
                      Alter_info *alter_info_arg) {
    create_info = create_info_arg;
    alter_info = alter_info_arg;
  }

 private:
  bool stmt_binlog_is_trans() const override;
  int binlog_show_create_table(THD *thd);
  void drop_open_table(THD *thd);
};

/**
  Base class for all INSERT and REPLACE statements. Abstract class that
  is inherited by Sql_cmd_insert_values and Sql_cmd_insert_select.
*/

class Sql_cmd_insert_base : public Sql_cmd_dml {
 protected:
  bool precheck(THD *thd) override;
  bool check_privileges(THD *thd) override;
  bool prepare_inner(THD *thd) override;
  bool restore_cmd_properties(THD *thd) override;

 private:
  bool resolve_update_expressions(THD *thd);
  bool prepare_values_table(THD *thd);
  bool resolve_values_table_columns(THD *thd);
  bool get_default_columns(THD *thd, TABLE *table,
                           MY_BITMAP **m_function_default_columns);

 protected:
  /// true when REPLACE statement, false when INSERT statement
  const bool is_replace;

 public:
  /**
    Field list to insert/replace

    One of two things:
    1. For the INSERT/REPLACE ... (col1, ... colN) VALUES ... syntax
       this is a list of col1, ..., colN fields.
    2. For the INSERT/REPLACE ... SET col1=x1, ... colM=xM syntax extension
       this is a list of col1, ... colM fields as well.
  */
  mem_root_deque<Item *> insert_field_list;
  /**
    Row data to insert/replace

    One of two things:
    1. For the INSERT/REPLACE ... VALUES (row1), (row2), ... (rowN) syntax
       the list contains N List_item lists: one List_item per row.
    2. For the INSERT/REPLACE ... SET col1=x1, ... colM=xM syntax extension
       this list contains only 1 List_item of M data values: this way we
       emulate this syntax:
         INSERT/REPLACE ... (col1, ... colM) VALUE (x1, ..., xM);
  */
  mem_root_deque<List_item *> insert_many_values;

  /// True if VALUES clause contain column references that need privilege check
  bool values_need_privilege_check{false};

  /// Number of columns in original insert column list
  uint column_count;

  /// Number of values per row in insert_many_values, available after resolving
  uint value_count;

  /// ON DUPLICATE KEY UPDATE field list
  mem_root_deque<Item *> update_field_list;

  /// ON DUPLICATE KEY UPDATE data value list
  mem_root_deque<Item *> update_value_list;

  /**
    ON DUPLICATE KEY UPDATE reference to VALUES.. as a derived table.
  */
  TABLE_LIST *values_table{nullptr};
  Create_col_name_list *values_column_list{nullptr};

  /**
    Field list for VALUES derived table. If no insert_field exists (e.g. INSERT
    INTO t0 ..), we have to create one to create Item_insert_values for ODKU
    statements.
  */
  mem_root_deque<Item *> values_field_list;

  const enum_duplicates duplicates;

  explicit Sql_cmd_insert_base(bool is_replace_arg,
                               enum_duplicates duplicates_arg)
      : is_replace(is_replace_arg),
        insert_field_list(*THR_MALLOC),
        insert_many_values(*THR_MALLOC),
        column_count(0),
        value_count(0),
        update_field_list(*THR_MALLOC),
        update_value_list(*THR_MALLOC),
        values_field_list(*THR_MALLOC),
        duplicates(duplicates_arg) {}

  bool accept(THD *thd, Select_lex_visitor *visitor) override;
};

/**
  Class that implements INSERT ... VALUES and REPLACE ... VALUES statements.
*/

class Sql_cmd_insert_values : public Sql_cmd_insert_base {
 public:
  explicit Sql_cmd_insert_values(bool is_replace_arg,
                                 enum_duplicates duplicates_arg)
      : Sql_cmd_insert_base(is_replace_arg, duplicates_arg) {}

  enum_sql_command sql_command_code() const override {
    return is_replace ? SQLCOM_REPLACE : SQLCOM_INSERT;
  }

  bool is_single_table_plan() const override { return true; }

 protected:
  bool execute_inner(THD *thd) override;
};

/**
  Class that implements INSERT ... SELECT and REPLACE ... SELECT statements.
*/

class Sql_cmd_insert_select : public Sql_cmd_insert_base {
 public:
  explicit Sql_cmd_insert_select(bool is_replace_arg,
                                 enum_duplicates duplicates_arg)
      : Sql_cmd_insert_base(is_replace_arg, duplicates_arg) {}

  enum_sql_command sql_command_code() const override {
    return is_replace ? SQLCOM_REPLACE_SELECT : SQLCOM_INSERT_SELECT;
  }
  const MYSQL_LEX_CSTRING *eligible_secondary_storage_engine() const override;
};

可见为了好好写代码,大佬儿们也不容易啊。其实如果对Sql了解,看代码基本就能明白这些类是作啥的,如果你不明白,还是回去先看看数据库的基本操作再来看代码可能效果会更好一些。讲真,没有开玩笑。
下来进入代码,开始分析:

bool Sql_cmd_insert_values::execute_inner(THD *thd) {
  DBUG_TRACE;

  //判断INSERT还是REPLACE
  assert(thd->lex->sql_command == SQLCOM_REPLACE ||
         thd->lex->sql_command == SQLCOM_INSERT);

  /*
    We have three alternative syntax rules for the INSERT statement:
    1) "INSERT (columns) VALUES ...", so non-listed columns need a default
    2) "INSERT VALUES (), ..." so all columns need a default;
    note that "VALUES (),(expr_1, ..., expr_n)" is not allowed, so checking
    emptiness of the first row is enough
    3) "INSERT VALUES (expr_1, ...), ..." so no defaults are needed; even if
    expr_i is "DEFAULT" (in which case the column is set by
    Item_default_value::save_in_field_inner()).
  */
   //有三种类型需要处理
  const bool manage_defaults = column_count > 0 ||  // 1)
                             value_count == 0;    // 2)
  //处理唯一键值冲突
  COPY_INFO info(COPY_INFO::INSERT_OPERATION, &insert_field_list,
                 manage_defaults, duplicates);
  COPY_INFO update(COPY_INFO::UPDATE_OPERATION, &update_field_list,
                   &update_value_list);

  Query_block *const query_block = lex->query_block;

  //获取插入表的序列
  TABLE_LIST *const table_list = lex->insert_table;
  TABLE *const insert_table = lex->insert_table_leaf->table;

  //启动重复更新处理
  if (duplicates == DUP_UPDATE || duplicates == DUP_REPLACE)
    prepare_for_positional_update(insert_table, table_list);

  /* Must be done before can_prune_insert, due to internal initialization. */
  if (info.add_function_default_columns(insert_table, insert_table->write_set))
    return true; /* purecov: inspected */
  if (duplicates == DUP_UPDATE && update.add_function_default_columns(
                                      insert_table, insert_table->write_set))
    return true; /* purecov: inspected */

  // Current error state inside and after the insert loop
  bool has_error = false;

  {  // Statement plan is available within these braces
    Modification_plan plan(
        thd, (lex->sql_command == SQLCOM_INSERT) ? MT_INSERT : MT_REPLACE,
        insert_table, nullptr, false, 0);
    DEBUG_SYNC(thd, "planned_single_insert");

    //explain的优化处理
    if (lex->is_explain()) {
      bool err =
          explain_single_table_modification(thd, thd, &plan, query_block);
      return err;
    }

    insert_table->next_number_field = insert_table->found_next_number_field;

    //从机的处理
    if (thd->slave_thread) {
      /* Get SQL thread's rli, even for a slave worker thread */
      Relay_log_info *c_rli = thd->rli_slave->get_c_rli();
      assert(c_rli != nullptr);
      if (info.get_duplicate_handling() == DUP_UPDATE &&
          insert_table->next_number_field != nullptr &&
          rpl_master_has_bug(c_rli, 24432, true, nullptr, nullptr))
        return true;
    }

    //触发器的处理
    THD_STAGE_INFO(thd, stage_update);
    if (duplicates == DUP_REPLACE &&
        (!insert_table->triggers ||
         !insert_table->triggers->has_delete_triggers()))
      insert_table->file->ha_extra(HA_EXTRA_WRITE_CAN_REPLACE);
    if (duplicates == DUP_UPDATE)
      insert_table->file->ha_extra(HA_EXTRA_INSERT_WITH_UPDATE);
    /*
      let's *try* to start bulk inserts. It won't necessary
      start them as insert_many_values.elements should be greater than
      some - handler dependent - threshold.
      We should not start bulk inserts if this statement uses
      functions or invokes triggers since they may access
      to the same table and therefore should not see its
      inconsistent state created by this optimization.
      So we call start_bulk_insert to perform nesessary checks on
      insert_many_values.elements, and - if nothing else - to initialize
      the code to make the call of end_bulk_insert() below safe.
    */
    if (duplicates != DUP_ERROR || lex->is_ignore())
      insert_table->file->ha_extra(HA_EXTRA_IGNORE_DUP_KEY);
    /*
       This is a simple check for the case when the table has a trigger
       that reads from it, or when the statement invokes a stored function
       that reads from the table being inserted to.
       Engines can't handle a bulk insert in parallel with a read form the
       same table in the same connection.
    */
    if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
      insert_table->file->ha_start_bulk_insert(insert_many_values.size());

    prepare_triggers_for_insert_stmt(thd, insert_table);

    /*
      Count warnings for all inserts. For single row insert, generate an error
      if trying to set a NOT NULL field to NULL.
      Notice that policy must be reset before leaving this function.
    */
    thd->check_for_truncated_fields =
        ((insert_many_values.size() == 1 && !lex->is_ignore())
             ? CHECK_FIELD_ERROR_FOR_NULL
             : CHECK_FIELD_WARN);
    thd->num_truncated_fields = 0L;

    for (Field **next_field = insert_table->field; *next_field; ++next_field) {
      (*next_field)->reset_warnings();
    }

    for (const List_item *values : insert_many_values) {
      Autoinc_field_has_explicit_non_null_value_reset_guard after_each_row(
          insert_table);

      restore_record(insert_table, s->default_values);  // Get empty record
      /*
        Check whether default values of the insert_field_list not specified in
        column list are correct or not.
      */
      //插入字段值的检测
      if (validate_default_values_of_unset_fields(thd, insert_table)) {
        has_error = true;
        break;
      }
      if (fill_record_n_invoke_before_triggers(
              thd, &info, insert_field_list, *values, insert_table,
              TRG_EVENT_INSERT, insert_table->s->fields, true, nullptr)) {
        assert(thd->is_error());
        /*
          TODO: Convert warnings to errors if values_list.elements == 1
          and check that all items return warning in case of problem with
          storing field.
        */
        has_error = true;
        break;
      }

      if (check_that_all_fields_are_given_values(thd, insert_table,
                                                 table_list)) {
        assert(thd->is_error());
        has_error = true;
        break;
      }

      const int check_result = table_list->view_check_option(thd);
      if (check_result == VIEW_CHECK_SKIP)
        continue;
      else if (check_result == VIEW_CHECK_ERROR) {
        has_error = true;
        break;
      }

      if (invoke_table_check_constraints(thd, insert_table)) {
        if (thd->is_error()) {
          has_error = true;
          break;
        }
        // continue when IGNORE clause is used.
        continue;
      }

       //历尽千辛万苦,终于来到了写功能函数
      if (write_record(thd, insert_table, &info, &update)) {
        has_error = true;
        break;
      }
      thd->get_stmt_da()->inc_current_row_for_condition();
    }
  }  // Statement plan is available within these braces

  assert(has_error == thd->get_stmt_da()->is_error());

  //写完成后处理回应和相关日志
  /*
    Now all rows are inserted.  Time to update logs and sends response to
    user
  */
  {
    /* TODO: Only call this if insert_table->found_next_number_field.*/
    insert_table->file->ha_release_auto_increment();
    /*
      Make sure 'end_bulk_insert()' is called regardless of current error
    */
    int loc_error = 0;
    if (thd->locked_tables_mode <= LTM_LOCK_TABLES)
      loc_error = insert_table->file->ha_end_bulk_insert();
    /*
      Report error if 'end_bulk_insert()' failed, and set 'has_error'
    */
    if (loc_error && !has_error) {
      /* purecov: begin inspected */
      myf error_flags = MYF(0);
      if (insert_table->file->is_fatal_error(loc_error))
        error_flags |= ME_FATALERROR;

      insert_table->file->print_error(loc_error, error_flags);
      has_error = true;
      /* purecov: end */
    }

   //写入的事务处理
    const bool transactional_table = insert_table->file->has_transactions();

    const bool changed MY_ATTRIBUTE((unused)) =
        info.stats.copied || info.stats.deleted || info.stats.updated;

    if (!has_error ||
        thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT)) {
      if (mysql_bin_log.is_open()) {
        int errcode = 0;
        if (!has_error) {
          /*
            [Guilhem wrote] Temporary errors may have filled
            thd->net.last_error/errno.  For example if there has
            been a disk full error when writing the row, and it was
            MyISAM, then thd->net.last_error/errno will be set to
            "disk full"... and the mysql_file_pwrite() will wait until free
            space appears, and so when it finishes then the
            write_row() was entirely successful
          */
          /* todo: consider removing */
          thd->clear_error();
        } else
          errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);

        /* bug#22725:

        A query which per-row-loop can not be interrupted with
        KILLED, like INSERT, and that does not invoke stored
        routines can be binlogged with neglecting the KILLED error.

        If there was no error (has_error == false) until after the end of
        inserting loop the KILLED flag that appeared later can be
        disregarded since previously possible invocation of stored
        routines did not result in any error due to the KILLED.  In
        such case the flag is ignored for constructing binlog event.
        */
        if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query().str,
                              thd->query().length, transactional_table, false,
                              false, errcode))
          has_error = true;
      }
    }
    assert(
        transactional_table || !changed ||
        thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT));
  }
  /*
    We'll report to the client this id:
    - if the table contains an autoincrement column and we successfully
    inserted an autogenerated value, the autogenerated value.
    - if the table contains no autoincrement column and LAST_INSERT_ID(X) was
    called, X.
    - if the table contains an autoincrement column, and some rows were
    inserted, the id of the last "inserted" row (if IGNORE, that value may not
    have been really inserted but ignored).
  */
  ulonglong id =
      (thd->first_successful_insert_id_in_cur_stmt > 0)
          ? thd->first_successful_insert_id_in_cur_stmt
          : (thd->arg_of_last_insert_id_function
                 ? thd->first_successful_insert_id_in_prev_stmt
                 : ((insert_table->next_number_field && info.stats.copied)
                        ? insert_table->next_number_field->val_int()
                        : 0));
  insert_table->next_number_field = nullptr;

  // Remember to restore warning handling before leaving
  thd->check_for_truncated_fields = CHECK_FIELD_IGNORE;

  assert(has_error == thd->get_stmt_da()->is_error());
  if (has_error) return true;

  if (insert_many_values.size() == 1 &&
      (!(thd->variables.option_bits & OPTION_WARNINGS) ||
       !thd->num_truncated_fields)) {
    my_ok(thd,
          info.stats.copied + info.stats.deleted +
              (thd->get_protocol()->has_client_capability(CLIENT_FOUND_ROWS)
                   ? info.stats.touched
                   : info.stats.updated),
          id);
  } else {
    char buff[160];
    ha_rows updated =
        thd->get_protocol()->has_client_capability(CLIENT_FOUND_ROWS)
            ? info.stats.touched
            : info.stats.updated;
    if (lex->is_ignore())
      snprintf(buff, sizeof(buff), ER_THD(thd, ER_INSERT_INFO),
               (long)info.stats.records,
               (long)(info.stats.records - info.stats.copied),
               (long)thd->get_stmt_da()->current_statement_cond_count());
    else
      snprintf(buff, sizeof(buff), ER_THD(thd, ER_INSERT_INFO),
               (long)info.stats.records, (long)(info.stats.deleted + updated),
               (long)thd->get_stmt_da()->current_statement_cond_count());
    my_ok(thd, info.stats.copied + info.stats.deleted + updated, id, buff);
  }

  /*
    If we have inserted into a VIEW, and the base table has
    AUTO_INCREMENT column, but this column is not accessible through
    a view, then we should restore LAST_INSERT_ID to the value it
    had before the statement.
  */
  if (table_list->is_view() && !table_list->contain_auto_increment)
    thd->first_successful_insert_id_in_cur_stmt =
        thd->first_successful_insert_id_in_prev_stmt;

  DBUG_EXECUTE_IF("after_mysql_insert", {
    const char act[] =
        "now "
        "wait_for signal.continue";
    assert(opt_debug_sync_timeout > 0);
    assert(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
  };);

  return false;
}

这个函数的说明写着,插入一到N条数据到表。明白了吧,核心就在这儿,不能放过它,其它的注释看代码中增加的,没有的可以看相关的英文注释,还是比较清楚的,下面看一下相关的写记录函数:

bool write_record(THD *thd, TABLE *table, COPY_INFO *info, COPY_INFO *update) {
  int error, trg_error = 0;
  char *key = nullptr;
  //位图:用于集合运算
  MY_BITMAP *save_read_set, *save_write_set;
  ulonglong prev_insert_id = table->file->next_insert_id;
  ulonglong insert_id_for_cur_row = 0;
  //内存分配管理
  MEM_ROOT mem_root;
  DBUG_TRACE;

  /* Here we are using separate MEM_ROOT as this memory should be freed once we
     exit write_record() function. This is marked as not instumented as it is
     allocated for very short time in a very specific case.
  */
  init_sql_alloc(PSI_NOT_INSTRUMENTED, &mem_root, 256, 0);
  info->stats.records++;
  save_read_set = table->read_set;
  save_write_set = table->write_set;

  const enum_duplicates duplicate_handling = info->get_duplicate_handling();

  if (duplicate_handling == DUP_REPLACE || duplicate_handling == DUP_UPDATE) {
    assert(duplicate_handling != DUP_UPDATE || update != nullptr);
    //此处这个ha_write_row就进入了存储引擎,这里循环处理这些数据
    while ((error = table->file->ha_write_row(table->record[0]))) {
      uint key_nr;
      /*
        If we do more than one iteration of this loop, from the second one the
        row will have an explicit value in the autoinc field, which was set at
        the first call of handler::update_auto_increment(). So we must save
        the autogenerated value to avoid thd->insert_id_for_cur_row to become
        0.
      */
      //处理自动ID
      if (table->file->insert_id_for_cur_row > 0)
        insert_id_for_cur_row = table->file->insert_id_for_cur_row;
      else
        table->file->insert_id_for_cur_row = insert_id_for_cur_row;
      bool is_duplicate_key_error;
      if (!table->file->is_ignorable_error(error)) goto err;
      is_duplicate_key_error =
          (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOUND_DUPP_UNIQUE);
      if (!is_duplicate_key_error) {
        /*
          We come here when we had an ignorable error which is not a duplicate
          key error. In this we ignore error if ignore flag is set, otherwise
          report error as usual. We will not do any duplicate key processing.
        */
        info->last_errno = error;
        table->file->print_error(error, MYF(0));
        /*
          If IGNORE option is used, handler errors will be downgraded
          to warnings and don't have to stop the iteration.
        */
        if (thd->is_error()) goto before_trg_err;
        goto ok_or_after_trg_err; /* Ignoring a not fatal error, return 0 */
      }
      if ((int)(key_nr = table->file->get_dup_key(error)) < 0) {
        error = HA_ERR_FOUND_DUPP_KEY; /* Database can't find key */
        goto err;
      }
      /*
        key index value is either valid in the range [0-MAX_KEY) or
        has value MAX_KEY as a marker for the case when no information
        about key can be found. In the last case we have to require
        that storage engine has the flag HA_DUPLICATE_POS turned on.
        If this invariant is false then assert will crash
        the server built in debug mode. For the server that was built
        without DEBUG we have additional check for the value of key_nr
        in the code below in order to report about error in any case.
      */
      //处理索引范围
      assert(key_nr != MAX_KEY ||
             (key_nr == MAX_KEY &&
              (table->file->ha_table_flags() & HA_DUPLICATE_POS)));

      DEBUG_SYNC(thd, "write_row_replace");

      /* Read all columns for the row we are going to replace */
      table->use_all_columns();
      /*
        Don't allow REPLACE to replace a row when a auto_increment column
        was used.  This ensures that we don't get a problem when the
        whole range of the key has been used.
      */
      if (duplicate_handling == DUP_REPLACE && table->next_number_field &&
          key_nr == table->s->next_number_index && (insert_id_for_cur_row > 0))
        goto err;
      if (table->file->ha_table_flags() & HA_DUPLICATE_POS) {
        if (table->file->ha_rnd_pos(table->record[1], table->file->dup_ref))
          goto err;
      }
      /*
        If the key index is equal to MAX_KEY it's treated as unknown key case
        and we shouldn't try to locate key info.
      */
      else if (key_nr < MAX_KEY) {
        if (!key) {
          if (!(key = (char *)my_safe_alloca(table->s->max_unique_length,
                                             MAX_KEY_LENGTH))) {
            error = ENOMEM;
            goto err;
          }
        }
        /*
          If we convert INSERT operation internally to an UPDATE.
          An INSERT operation may update table->vfield for BLOB fields,
          So here we recalculate data for generated columns.
        */
        if (table->vfield) {
          update_generated_write_fields(table->write_set, table);
        }

        key_copy((uchar *)key, table->record[0], table->key_info + key_nr, 0);
        if ((error = (table->file->ha_index_read_idx_map(
                 table->record[1], key_nr, (uchar *)key, HA_WHOLE_KEY,
                 HA_READ_KEY_EXACT))))
          goto err;
      } else {
        /*
          For the server built in non-debug mode returns error if
          handler::get_dup_key() returned MAX_KEY as the value of key index.
        */
        error = HA_ERR_FOUND_DUPP_KEY; /* Database can't find key */
        goto err;
      }
      if (duplicate_handling == DUP_UPDATE) {
        int res = 0;
        /*
          We don't check for other UNIQUE keys - the first row
          that matches, is updated. If update causes a conflict again,
          an error is returned
        */
        assert(table->insert_values != nullptr);
        /*
          The insert has failed, store the insert_id generated for
          this row to be re-used for the next insert.
        */
        if (insert_id_for_cur_row > 0) prev_insert_id = insert_id_for_cur_row;

        store_record(table, insert_values);
        /*
          Special check for BLOB/GEOMETRY field in statements with
          "ON DUPLICATE KEY UPDATE" clause.
          See mysql_prepare_blob_values() function for more details.
        */
        //二进制对象的存储,如果错误则存储相关数据记录,下次再次调用
        if (mysql_prepare_blob_values(thd, *update->get_changed_columns(),
                                      &mem_root))
          goto before_trg_err;
        restore_record(table, record[1]);
        assert(update->get_changed_columns()->size() ==
               update->update_values->size());
        /*
          Reset TABLE::autoinc_field_has_explicit_non_null_value so we can
          figure out if ON DUPLICATE KEY UPDATE clause specifies value for
          auto-increment field as a side-effect of fill_record(). There is
          no need to clean-up this flag afterwards as this is responsibility
          of the caller.
        */
        table->autoinc_field_has_explicit_non_null_value = false;
        bool is_row_changed = false;
        if (fill_record_n_invoke_before_triggers(
                thd, update, *update->get_changed_columns(),
                *update->update_values, table, TRG_EVENT_UPDATE, 0, true,
                &is_row_changed))
          goto before_trg_err;

        bool insert_id_consumed = false;
        if (  // UPDATE clause specifies a value for the auto increment field
            table->autoinc_field_has_explicit_non_null_value &&
            // An auto increment value has been generated for this row
            (insert_id_for_cur_row > 0)) {
          // After-update value:
          const ulonglong auto_incr_val = table->next_number_field->val_int();
          if (auto_incr_val == insert_id_for_cur_row) {
            // UPDATE wants to use the generated value
            insert_id_consumed = true;
          } else if (table->file->auto_inc_interval_for_cur_row.in_range(
                         auto_incr_val)) {
            /*
              UPDATE wants to use one auto generated value which we have already
              reserved for another (previous or following) row. That may cause
              a duplicate key error if we later try to insert the reserved
              value. Such conflicts on auto generated values would be strange
              behavior, so we return a clear error now.
            */
            my_error(ER_AUTO_INCREMENT_CONFLICT, MYF(0));
            goto before_trg_err;
          }
        }

        if (!insert_id_consumed)
          table->file->restore_auto_increment(prev_insert_id);

        info->stats.touched++;
        if (is_row_changed) {
          /*
            CHECK OPTION for VIEW ... ON DUPLICATE KEY UPDATE ...
            It is safe to not invoke CHECK OPTION for VIEW if records are
            same. In this case the row is coming from the view and thus
            should satisfy the CHECK OPTION.
          */
          {
            const TABLE_LIST *inserted_view =
                table->pos_in_table_list->belong_to_view;
            if (inserted_view != nullptr) {
              res = inserted_view->view_check_option(thd);
              if (res == VIEW_CHECK_SKIP) goto ok_or_after_trg_err;
              if (res == VIEW_CHECK_ERROR) goto before_trg_err;
            }
          }

          /*
            Existing rows in table should normally satisfy CHECK constraints. So
            it should be safe to check constraints only for rows that has really
            changed (i.e. after compare_records()).

            In future, once addition/enabling of CHECK constraints without their
            validation is supported, we might encounter old rows which do not
            satisfy CHECK constraints currently enabled. However, rejecting
            no-op updates to such invalid pre-existing rows won't make them
            valid and is probably going to be confusing for users. So it makes
            sense to stick to current behavior.
          */
          //检查各种约束
          if (invoke_table_check_constraints(thd, table)) {
            if (thd->is_error()) goto before_trg_err;
            // return false when IGNORE clause is used.
            goto ok_or_after_trg_err;
          }

          if ((error = table->file->ha_update_row(table->record[1],
                                                  table->record[0])) &&
              error != HA_ERR_RECORD_IS_THE_SAME) {
            info->last_errno = error;
            myf error_flags = MYF(0);
            if (table->file->is_fatal_error(error))
              error_flags |= ME_FATALERROR;
            table->file->print_error(error, error_flags);
            /*
              If IGNORE option is used, handler errors will be downgraded
              to warnings and don't  have to stop the iteration.
            */
            if (thd->is_error()) goto before_trg_err;
            goto ok_or_after_trg_err; /* Ignoring a not fatal error, return 0 */
          }

          if (error != HA_ERR_RECORD_IS_THE_SAME)
            info->stats.updated++;
          else
            error = 0;
          /*
            If ON DUP KEY UPDATE updates a row instead of inserting one, it's
            like a regular UPDATE statement: it should not affect the value of a
            next SELECT LAST_INSERT_ID() or mysql_insert_id().
            Except if LAST_INSERT_ID(#) was in the INSERT query, which is
            handled separately by THD::arg_of_last_insert_id_function.
          */
          insert_id_for_cur_row = table->file->insert_id_for_cur_row = 0;
          info->stats.copied++;
        }

        // Execute the 'AFTER, ON UPDATE' trigger
        trg_error = (table->triggers &&
                     table->triggers->process_triggers(thd, TRG_EVENT_UPDATE,
                                                       TRG_ACTION_AFTER, true));
        goto ok_or_after_trg_err;
      } else /* DUP_REPLACE */
      {
        TABLE_LIST *view = table->pos_in_table_list->belong_to_view;

        if (view && view->replace_filter) {
          const size_t record_length = table->s->reclength;

          void *record0_saved =
              my_malloc(PSI_NOT_INSTRUMENTED, record_length, MYF(MY_WME));

          if (!record0_saved) {
            error = ENOMEM;
            goto err;
          }

          // Save the record used for comparison.
          memcpy(record0_saved, table->record[0], record_length);

          // Preparing the record for comparison.
          memcpy(table->record[0], table->record[1], record_length);

          // Checking if the row being conflicted is visible by the view.
          bool found_row_in_view = view->replace_filter->val_int();

          // Restoring the record back.
          memcpy(table->record[0], record0_saved, record_length);

          my_free(record0_saved);

          if (!found_row_in_view) {
            my_error(ER_REPLACE_INACCESSIBLE_ROWS, MYF(0));
            goto err;
          }
        }

        /*
          The manual defines the REPLACE semantics that it is either
          an INSERT or DELETE(s) + INSERT; FOREIGN KEY checks in
          InnoDB do not function in the defined way if we allow MySQL
          to convert the latter operation internally to an UPDATE.
          We also should not perform this conversion if we have
          timestamp field with ON UPDATE which is different from DEFAULT.
          Another case when conversion should not be performed is when
          we have ON DELETE trigger on table so user may notice that
          we cheat here. Note that it is ok to do such conversion for
          tables which have ON UPDATE but have no ON DELETE triggers,
          we just should not expose this fact to users by invoking
          ON UPDATE triggers.
        */
        处理ON DELETE  ,ON UPDATE触发器
        if (last_uniq_key(table, key_nr) &&
            !table->s->is_referenced_by_foreign_key() &&
            (!table->triggers || !table->triggers->has_delete_triggers())) {
          if ((error = table->file->ha_update_row(table->record[1],
                                                  table->record[0])) &&
              error != HA_ERR_RECORD_IS_THE_SAME)
            goto err;
          if (error != HA_ERR_RECORD_IS_THE_SAME)
            info->stats.deleted++;
          else
            error = 0;
          thd->record_first_successful_insert_id_in_cur_stmt(
              table->file->insert_id_for_cur_row);
          /*
            Since we pretend that we have done insert we should call
            its after triggers.
          */
          goto after_trg_n_copied_inc;
        } else {
          //处理事务和触发器
          if (table->triggers &&
              table->triggers->process_triggers(thd, TRG_EVENT_DELETE,
                                                TRG_ACTION_BEFORE, true))
            goto before_trg_err;
          if ((error = table->file->ha_delete_row(table->record[1]))) goto err;
          info->stats.deleted++;
          if (!table->file->has_transactions())
            thd->get_transaction()->mark_modified_non_trans_table(
                Transaction_ctx::STMT);
          if (table->triggers &&
              table->triggers->process_triggers(thd, TRG_EVENT_DELETE,
                                                TRG_ACTION_AFTER, true)) {
            trg_error = 1;
            goto ok_or_after_trg_err;
          }
          /* Let us attempt do write_row() once more */
        }
      }
    }

    /*
        If more than one iteration of the above while loop is done, from the
       second one the row being inserted will have an explicit value in the
       autoinc field, which was set at the first call of
       handler::update_auto_increment(). This value is saved to avoid
       thd->insert_id_for_cur_row becoming 0. Use this saved autoinc value.
     */
     //插入自动ID字段
    if (table->file->insert_id_for_cur_row == 0)
      table->file->insert_id_for_cur_row = insert_id_for_cur_row;

    thd->record_first_successful_insert_id_in_cur_stmt(
        table->file->insert_id_for_cur_row);
    /*
      Restore column maps if they where replaced during an duplicate key
      problem.
    */
    if (table->read_set != save_read_set || table->write_set != save_write_set)
      table->column_bitmaps_set(save_read_set, save_write_set);
      //这里再次调用引擎写入
  } else if ((error = table->file->ha_write_row(table->record[0]))) {
    DEBUG_SYNC(thd, "write_row_noreplace");
    info->last_errno = error;
    myf error_flags = MYF(0);
    if (table->file->is_fatal_error(error)) error_flags |= ME_FATALERROR;
    table->file->print_error(error, error_flags);
    /*
      If IGNORE option is used, handler errors will be downgraded
      to warnings and don't  have to stop the iteration.
    */
    if (thd->is_error()) goto before_trg_err;
    table->file->restore_auto_increment(prev_insert_id);
    goto ok_or_after_trg_err;
  }

after_trg_n_copied_inc:
  info->stats.copied++;
  thd->record_first_successful_insert_id_in_cur_stmt(
      table->file->insert_id_for_cur_row);
  trg_error =
      (table->triggers && table->triggers->process_triggers(
                              thd, TRG_EVENT_INSERT, TRG_ACTION_AFTER, true));

ok_or_after_trg_err:
  if (key) my_safe_afree(key, table->s->max_unique_length, MAX_KEY_LENGTH);
  if (!table->file->has_transactions())
    thd->get_transaction()->mark_modified_non_trans_table(
        Transaction_ctx::STMT);
  free_root(&mem_root, MYF(0));
  return trg_error;

err : {
  myf error_flags = MYF(0); /**< Flag for fatal errors */
  info->last_errno = error;
  if (table->file->is_fatal_error(error)) error_flags |= ME_FATALERROR;

  table->file->print_error(error, error_flags);
}

before_trg_err:
  table->file->restore_auto_increment(prev_insert_id);
  if (key) my_safe_afree(key, table->s->max_unique_length, MAX_KEY_LENGTH);
  table->column_bitmaps_set(save_read_set, save_write_set);
  free_root(&mem_root, MYF(0));
  return true;
}

这里面调用引擎的函数ha_write_row()(sql/handler.cc)中:

int handler::ha_write_row(uchar *buf) {
  int error;
  Log_func *log_func = Write_rows_log_event::binlog_row_logging_function;
  assert(table_share->tmp_table != NO_TMP_TABLE || m_lock_type == F_WRLCK);

  DBUG_TRACE;
  DBUG_EXECUTE_IF("inject_error_ha_write_row", return HA_ERR_INTERNAL_ERROR;);
  DBUG_EXECUTE_IF("simulate_storage_engine_out_of_memory",
                  return HA_ERR_SE_OUT_OF_MEMORY;);
  mark_trx_read_write();

  DBUG_EXECUTE_IF(
      "handler_crashed_table_on_usage",
      my_error(HA_ERR_CRASHED, MYF(ME_ERRORLOG), table_share->table_name.str);
      set_my_errno(HA_ERR_CRASHED); return HA_ERR_CRASHED;);

 //调用写引擎
  MYSQL_TABLE_IO_WAIT(PSI_TABLE_WRITE_ROW, MAX_KEY, error,
                      { error = write_row(buf); })

  if (unlikely(error)) return error;

  //处理日志
  if (unlikely((error = binlog_log_row(table, nullptr, buf, log_func))))
    return error; /* purecov: inspected */

  DEBUG_SYNC_C("ha_write_row_end");
  return 0;
}

这个handler类是所有引擎的一个基类,调用它就意味着调用着具体的子类对象,这是c++的一个用法。不过,ha_innobase这个字定义在了stroage\innobase\handler\ha_innodb.cc中:

int ha_innobase::write_row(uchar *record) /*!< in: a row in MySQL format */
{
  dberr_t error;
  int error_result = 0;
  bool auto_inc_used = false;

  DBUG_TRACE;

  /* Increase the write count of handler */
  ha_statistic_increment(&System_status_var::ha_write_count);

  if (m_prebuilt->table->is_intrinsic()) {
    return intrinsic_table_write_row(record);
  }

  trx_t *trx = thd_to_trx(m_user_thd);
  TrxInInnoDB trx_in_innodb(trx);

  if (!m_prebuilt->table->is_intrinsic() && trx_in_innodb.is_aborted()) {
    innobase_rollback(ht, m_user_thd, false);

    return convert_error_code_to_mysql(DB_FORCED_ABORT, 0, m_user_thd);
  }

  /* Validation checks before we commence write_row operation. */
  if (high_level_read_only) {
    ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
    return HA_ERR_TABLE_READONLY;
  } else if (m_prebuilt->trx != trx) {
    ib::error(ER_IB_MSG_558) << "The transaction object for the table handle is"
                                " at "
                             << static_cast<const void *>(m_prebuilt->trx)
                             << ", but for the current thread it is at "
                             << static_cast<const void *>(trx);

    fputs("InnoDB: Dump of 200 bytes around m_prebuilt: ", stderr);
    ut_print_buf(stderr, ((const byte *)m_prebuilt) - 100, 200);
    fputs("\nInnoDB: Dump of 200 bytes around ha_data: ", stderr);
    ut_print_buf(stderr, ((const byte *)trx) - 100, 200);
    putc('\n', stderr);
    ut_error;
  } else if (!trx_is_started(trx)) {
    ++trx->will_lock;
  }

  /* Handling of Auto-Increment Columns. */
  if (table->next_number_field && record == table->record[0]) {
    /* Reset the error code before calling
    innobase_get_auto_increment(). */
    m_prebuilt->autoinc_error = DB_SUCCESS;

    if ((error_result = update_auto_increment())) {
      /* We don't want to mask autoinc overflow errors. */

      /* Handle the case where the AUTOINC sub-system
      failed during initialization. */
      if (m_prebuilt->autoinc_error == DB_UNSUPPORTED) {
        error_result = ER_AUTOINC_READ_FAILED;
        /* Set the error message to report too. */
        my_error(ER_AUTOINC_READ_FAILED, MYF(0));
        goto func_exit;
      } else if (m_prebuilt->autoinc_error != DB_SUCCESS) {
        error = m_prebuilt->autoinc_error;
        goto report_error;
      }

      /* MySQL errors are passed straight back. */
      goto func_exit;
    }

    auto_inc_used = true;
  }

  /* Prepare INSERT graph that will be executed for actual INSERT
  (This is a one time operation) */
  if (m_prebuilt->mysql_template == nullptr ||
      m_prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
    /* Build the template used in converting quickly between
    the two database formats */

    build_template(true);
  }

  error = innobase_srv_conc_enter_innodb(m_prebuilt);

  if (error != DB_SUCCESS) {
    goto report_error;
  }

  /* Execute insert graph that will result in actual insert. */
  error = row_insert_for_mysql((byte *)record, m_prebuilt);

  DEBUG_SYNC(m_user_thd, "ib_after_row_insert");

  /* Handling of errors related to auto-increment. */
  if (auto_inc_used) {
    ulonglong auto_inc;
    ulonglong col_max_value;

    /* Note the number of rows processed for this statement, used
    by get_auto_increment() to determine the number of AUTO-INC
    values to reserve. This is only useful for a mult-value INSERT
    and is a statement level counter. */
    if (trx->n_autoinc_rows > 0) {
      --trx->n_autoinc_rows;
    }

    /* We need the upper limit of the col type to check for
    whether we update the table autoinc counter or not. */
    col_max_value = table->next_number_field->get_max_int_value();

    /* Get the value that MySQL attempted to store in the table. */
    auto_inc = table->next_number_field->val_int();

    switch (error) {
      case DB_DUPLICATE_KEY:

        /* A REPLACE command and LOAD DATA INFILE REPLACE
        handle a duplicate key error themselves, but we
        must update the autoinc counter if we are performing
        those statements. */

        switch (thd_sql_command(m_user_thd)) {
          case SQLCOM_LOAD:
            if (!m_prebuilt->allow_duplicates()) {
              break;
            }

          case SQLCOM_REPLACE:
          case SQLCOM_INSERT_SELECT:
          case SQLCOM_REPLACE_SELECT:
            goto set_max_autoinc;

          default:
            break;
        }

        break;

      case DB_SUCCESS:
        /* If the actual value inserted is greater than
        the upper limit of the interval, then we try and
        update the table upper limit. Note: last_value
        will be 0 if get_auto_increment() was not called. */

        if (auto_inc >= m_prebuilt->autoinc_last_value) {
        set_max_autoinc:
          /* This should filter out the negative
          values set explicitly by the user. */
          if (auto_inc <= col_max_value) {
            ut_a(m_prebuilt->autoinc_increment > 0);

            ulonglong offset;
            ulonglong increment;
            dberr_t err;

            offset = m_prebuilt->autoinc_offset;
            increment = m_prebuilt->autoinc_increment;

            auto_inc = innobase_next_autoinc(auto_inc, 1, increment, offset,
                                             col_max_value);

            err = innobase_set_max_autoinc(auto_inc);

            if (err != DB_SUCCESS) {
              error = err;
            }
          }
        }
        break;
      default:
        break;
    }
  }

  innobase_srv_conc_exit_innodb(m_prebuilt);

report_error:
  /* Cleanup and exit. */
  if (error == DB_TABLESPACE_DELETED) {
    ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, ER_TABLESPACE_DISCARDED,
                table->s->table_name.str);
  }

  error_result =
      convert_error_code_to_mysql(error, m_prebuilt->table->flags, m_user_thd);

  if (error_result == HA_FTS_INVALID_DOCID) {
    my_error(HA_FTS_INVALID_DOCID, MYF(0));
  }

func_exit:

  innobase_active_small();

  return error_result;
}

上述这个函数的代码核心不是调用了row_insert_for_mysql这个函数,它在storage\innobase\row\row0mydql.cc:

dberr_t row_insert_for_mysql(const byte *mysql_rec, row_prebuilt_t *prebuilt) {
  /* For intrinsic tables there a lot of restrictions that can be
  relaxed including locking of table, transaction handling, etc.
  Use direct cursor interface for inserting to intrinsic tables. */
  if (prebuilt->table->is_intrinsic()) {
    return (row_insert_for_mysql_using_cursor(mysql_rec, prebuilt));
  } else {
    return (row_insert_for_mysql_using_ins_graph(mysql_rec, prebuilt));
  }
}
static dberr_t row_insert_for_mysql_using_cursor(const byte *mysql_rec,
                                                 row_prebuilt_t *prebuilt) {
  dberr_t err = DB_SUCCESS;
  ins_node_t *node = nullptr;
  que_thr_t *thr = nullptr;
  mtr_t mtr;

  /* Step-1: Get the reference of row to insert. */
  row_get_prebuilt_insert_row(prebuilt);
  node = prebuilt->ins_node;
  thr = que_fork_get_first_thr(prebuilt->ins_graph);

  /* Step-2: Convert row from MySQL row format to InnoDB row format. */
  row_mysql_to_innobase(node->row, prebuilt, mysql_rec);

  /* Step-3: Append row-id index is not unique. */
  dict_index_t *clust_index = node->table->first_index();

  if (!dict_index_is_unique(clust_index)) {
    dict_sys_write_row_id(node->row_id_buf,
                          dict_table_get_next_table_sess_row_id(node->table));
  }

  trx_write_trx_id(node->trx_id_buf,
                   dict_table_get_next_table_sess_trx_id(node->table));

  /* Step-4: Iterate over all the indexes and insert entries. */
  dict_index_t *inserted_upto = nullptr;
  node->entry = UT_LIST_GET_FIRST(node->entry_list);
  for (dict_index_t *index = UT_LIST_GET_FIRST(node->table->indexes);
       index != nullptr; index = UT_LIST_GET_NEXT(indexes, index),
                    node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry)) {
    node->index = index;
    err = row_ins_index_entry_set_vals(node->index, node->entry, node->row);
    if (err != DB_SUCCESS) {
      break;
    }

    if (index->is_clustered()) {
      err = row_ins_clust_index_entry(node->index, node->entry, thr, false);
    } else {
      err = row_ins_sec_index_entry(node->index, node->entry, thr, false);
    }

    if (err == DB_SUCCESS) {
      inserted_upto = index;
    } else {
      break;
    }
  }

  /* Step-5: If error is encountered while inserting entries to any
  of the index then entries inserted to previous indexes are removed
  explicity. Automatic rollback is not in action as UNDO logs are
  turned-off. */
  if (err != DB_SUCCESS) {
    node->entry = UT_LIST_GET_FIRST(node->entry_list);

    mtr_start(&mtr);
    dict_disable_redo_if_temporary(node->table, &mtr);

    for (dict_index_t *index = UT_LIST_GET_FIRST(node->table->indexes);
         inserted_upto != nullptr; index = UT_LIST_GET_NEXT(indexes, index),
                      node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry)) {
      row_explicit_rollback(index, node->entry, thr, &mtr);

      if (index == inserted_upto) {
        break;
      }
    }

    mtr_commit(&mtr);
  } else {
    /* Not protected by dict_table_stats_lock() for performance
    reasons, we would rather get garbage in stat_n_rows (which is
    just an estimate anyway) than protecting the following code
    , with a latch. */
    dict_table_n_rows_inc(node->table);

    if (node->table->is_system_table) {
      srv_stats.n_system_rows_inserted.inc();
    } else {
      srv_stats.n_rows_inserted.inc();
    }
  }

  thr_get_trx(thr)->error_state = DB_SUCCESS;
  return (err);
}

/** Does an insert for MySQL using INSERT graph. This function will run/execute
INSERT graph.
@param[in]	mysql_rec	row in the MySQL format
@param[in,out]	prebuilt	prebuilt struct in MySQL handle
@return error code or DB_SUCCESS */
static dberr_t row_insert_for_mysql_using_ins_graph(const byte *mysql_rec,
                                                    row_prebuilt_t *prebuilt) {
  trx_savept_t savept;
  que_thr_t *thr;
  dberr_t err;
  ibool was_lock_wait;
  trx_t *trx = prebuilt->trx;
  ins_node_t *node = prebuilt->ins_node;
  dict_table_t *table = prebuilt->table;
  /* FIX_ME: This blob heap is used to compensate an issue in server
  for virtual column blob handling */
  mem_heap_t *blob_heap = nullptr;

  ut_ad(trx);
  ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
  ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);

  if (dict_table_is_discarded(prebuilt->table)) {
    ib::error(ER_IB_MSG_976)
        << "The table " << prebuilt->table->name
        << " doesn't have a corresponding tablespace, it was"
           " discarded.";

    return (DB_TABLESPACE_DELETED);

  } else if (prebuilt->table->ibd_file_missing) {
    ib::error(ER_IB_MSG_977)
        << ".ibd file is missing for table " << prebuilt->table->name;

    return (DB_TABLESPACE_NOT_FOUND);

  } else if (srv_force_recovery &&
             !(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN &&
               dict_sys_t::is_dd_table_id(prebuilt->table->id))) {
    /* Allow to modify hardcoded DD tables in some scenario to
    make DDL work */

    ib::error(ER_IB_MSG_978) << MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY;

    return (DB_READ_ONLY);
  }

  DBUG_EXECUTE_IF("mark_table_corrupted", {
    /* Mark the table corrupted for the clustered index */
    dict_index_t *index = table->first_index();
    ut_ad(index->is_clustered());
    dict_set_corrupted(index);
  });

  if (table->is_corrupted()) {
    ib::error(ER_IB_MSG_979) << "Table " << table->name << " is corrupt.";
    return (DB_TABLE_CORRUPT);
  }

  trx->op_info = "inserting";

  row_mysql_delay_if_needed();

  trx_start_if_not_started_xa(trx, true);

  row_get_prebuilt_insert_row(prebuilt);
  node = prebuilt->ins_node;

  row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec, &blob_heap);

  savept = trx_savept_take(trx);

  thr = que_fork_get_first_thr(prebuilt->ins_graph);

  if (prebuilt->sql_stat_start) {
    node->state = INS_NODE_SET_IX_LOCK;
    prebuilt->sql_stat_start = FALSE;
  } else {
    node->state = INS_NODE_ALLOC_ROW_ID;
  }

  que_thr_move_to_run_state_for_mysql(thr, trx);

run_again:
  thr->run_node = node;
  thr->prev_node = node;

  row_ins_step(thr);

  DEBUG_SYNC_C("ib_after_row_insert_step");

  err = trx->error_state;

  if (err != DB_SUCCESS) {
  error_exit:
    que_thr_stop_for_mysql(thr);

    /* FIXME: What's this ? */
    thr->lock_state = QUE_THR_LOCK_ROW;

    was_lock_wait = row_mysql_handle_errors(&err, trx, thr, &savept);

    thr->lock_state = QUE_THR_LOCK_NOLOCK;

    if (was_lock_wait) {
      ut_ad(node->state == INS_NODE_INSERT_ENTRIES ||
            node->state == INS_NODE_ALLOC_ROW_ID);
      goto run_again;
    }

    trx->op_info = "";

    if (blob_heap != nullptr) {
      mem_heap_free(blob_heap);
    }

    return (err);
  }

  if (dict_table_has_fts_index(table)) {
    doc_id_t doc_id;

    /* Extract the doc id from the hidden FTS column */
    doc_id = fts_get_doc_id_from_row(table, node->row);

    if (doc_id <= 0) {
      ib::error(ER_IB_MSG_980) << "FTS Doc ID must be large than 0";
      err = DB_FTS_INVALID_DOCID;
      trx->error_state = DB_FTS_INVALID_DOCID;
      goto error_exit;
    }

    if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
      doc_id_t next_doc_id = table->fts->cache->next_doc_id;

      if (doc_id < next_doc_id) {
        ib::error(ER_IB_MSG_981)
            << "FTS Doc ID must be large than " << next_doc_id - 1
            << " for table " << table->name;

        err = DB_FTS_INVALID_DOCID;
        trx->error_state = DB_FTS_INVALID_DOCID;
        goto error_exit;
      }

      /* Difference between Doc IDs are restricted within
      4 bytes integer. See fts_get_encoded_len(). Consecutive
      doc_ids difference should not exceed
      FTS_DOC_ID_MAX_STEP value. */

      if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
        ib::error(ER_IB_MSG_982) << "Doc ID " << doc_id
                                 << " is too big. Its difference with"
                                    " largest used Doc ID "
                                 << next_doc_id - 1
                                 << " cannot"
                                    " exceed or equal to "
                                 << FTS_DOC_ID_MAX_STEP;
        err = DB_FTS_INVALID_DOCID;
        trx->error_state = DB_FTS_INVALID_DOCID;
        goto error_exit;
      }
    }

    if (table->skip_alter_undo) {
      if (trx->fts_trx == nullptr) {
        trx->fts_trx = fts_trx_create(trx);
      }

      fts_trx_table_t ftt;
      ftt.table = table;
      ftt.fts_trx = trx->fts_trx;

      fts_add_doc_from_tuple(&ftt, doc_id, node->row);

    } else {
      /* Pass NULL for the columns affected, since an INSERT
      affects all FTS indexes. */
      fts_trx_add_op(trx, table, doc_id, FTS_INSERT, nullptr);
    }
  }

  que_thr_stop_for_mysql_no_error(thr, trx);

  if (table->is_system_table) {
    srv_stats.n_system_rows_inserted.inc();
  } else {
    srv_stats.n_rows_inserted.inc();
  }

  /* Not protected by dict_table_stats_lock() for performance
  reasons, we would rather get garbage in stat_n_rows (which is
  just an estimate anyway) than protecting the following code
  with a latch. */
  dict_table_n_rows_inc(table);

  row_update_statistics_if_needed(table);
  trx->op_info = "";

  if (blob_heap != nullptr) {
    mem_heap_free(blob_heap);
  }

  return (err);
}

上面分成游标和图的插入两种形式来操作具体的插入数据,里面会有很的限制和约束,包括表的锁定、事务等。如果使用游标则直接插入,反之,在图的操作中会增加一步写入row_ins_step:

que_thr_t *row_ins_step(que_thr_t *thr) /*!< in: query thread */
{
  ins_node_t *node;
  que_node_t *parent;
  sel_node_t *sel_node;
  trx_t *trx;
  dberr_t err;

  ut_ad(thr);

  DEBUG_SYNC_C("innodb_row_ins_step_enter");

  trx = thr_get_trx(thr);

  trx_start_if_not_started_xa(trx, true);

  node = static_cast<ins_node_t *>(thr->run_node);

  ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
  ut_ad(!node->table->is_intrinsic());

  parent = que_node_get_parent(node);
  sel_node = node->select;

  if (thr->prev_node == parent) {
    node->state = INS_NODE_SET_IX_LOCK;
  }

  /* If this is the first time this node is executed (or when
  execution resumes after wait for the table IX lock), set an
  IX lock on the table and reset the possible select node. MySQL's
  partitioned table code may also call an insert within the same
  SQL statement AFTER it has used this table handle to do a search.
  This happens, for example, when a row update moves it to another
  partition. In that case, we have already set the IX lock on the
  table during the search operation, and there is no need to set
  it again here. But we must write trx->id to node->trx_id_buf. */

  memset(node->trx_id_buf, 0, DATA_TRX_ID_LEN);
  trx_write_trx_id(node->trx_id_buf, trx->id);

  if (node->state == INS_NODE_SET_IX_LOCK) {
    node->state = INS_NODE_ALLOC_ROW_ID;

    /* It may be that the current session has not yet started
    its transaction, or it has been committed: */

    if (trx->id == node->trx_id) {
      /* No need to do IX-locking */

      goto same_trx;
    }

    err = lock_table(0, node->table, LOCK_IX, thr);

    DBUG_EXECUTE_IF("ib_row_ins_ix_lock_wait", err = DB_LOCK_WAIT;);

    if (err != DB_SUCCESS) {
      goto error_handling;
    }

    node->trx_id = trx->id;
  same_trx:
    if (node->ins_type == INS_SEARCHED) {
      /* Reset the cursor */
      sel_node->state = SEL_NODE_OPEN;

      /* Fetch a row to insert */

      thr->run_node = sel_node;

      return (thr);
    }
  }

  if ((node->ins_type == INS_SEARCHED) && (sel_node->state != SEL_NODE_FETCH)) {
    ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);

    /* No more rows to insert */
    thr->run_node = parent;

    return (thr);
  }

  /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */

  err = row_ins(node, thr);

error_handling:
  trx->error_state = err;

  if (err != DB_SUCCESS) {
    /* err == DB_LOCK_WAIT or SQL error detected */
    return (nullptr);
  }

  /* DO THE TRIGGER ACTIONS HERE */

  if (node->ins_type == INS_SEARCHED) {
    /* Fetch a row to insert */

    thr->run_node = sel_node;
  } else {
    thr->run_node = que_node_get_parent(node);
  }

  return (thr);
}

这玩意儿又调用了row_ins这个函数:

static MY_ATTRIBUTE((warn_unused_result)) dberr_t
    row_ins(ins_node_t *node, /*!< in: row insert node */
            que_thr_t *thr)   /*!< in: query thread */
{
  dberr_t err;

  DBUG_TRACE;

  DBUG_PRINT("row_ins", ("table: %s", node->table->name.m_name));

  if (node->state == INS_NODE_ALLOC_ROW_ID) {
    row_ins_alloc_row_id_step(node);

    node->index = node->table->first_index();
    node->entry = UT_LIST_GET_FIRST(node->entry_list);

    if (node->ins_type == INS_SEARCHED) {
      row_ins_get_row_from_query_block(node);

    } else if (node->ins_type == INS_VALUES) {
      row_ins_get_row_from_values(node);
    }

    node->state = INS_NODE_INSERT_ENTRIES;
  }

  ut_ad(node->state == INS_NODE_INSERT_ENTRIES);

  while (node->index != nullptr) {
    if (node->index->type != DICT_FTS) {
      err = row_ins_index_entry_step(node, thr);

      switch (err) {
        case DB_SUCCESS:
          break;
        case DB_DUPLICATE_KEY:
          thr_get_trx(thr)->error_state = DB_DUPLICATE_KEY;
          thr_get_trx(thr)->error_index = node->index;
        // fall through
        default:
          return err;
      }
    }

    node->index = node->index->next();
    node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);

    DBUG_EXECUTE_IF("row_ins_skip_sec", node->index = nullptr;
                    node->entry = nullptr; break;);

    /* Skip corrupted secondary index and its entry */
    while (node->index && node->index->is_corrupted()) {
      node->index = node->index->next();
      node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
    }
  }

  ut_ad(node->entry == nullptr);

  thr_get_trx(thr)->error_index = nullptr;
  node->state = INS_NODE_ALLOC_ROW_ID;

  return DB_SUCCESS;
}

看到没这是一个宏转义的函数。它再调用row_ins_index_entry_step:

static MY_ATTRIBUTE((warn_unused_result)) dberr_t
    row_ins_index_entry_step(ins_node_t *node, /*!< in: row insert node */
                             que_thr_t *thr)   /*!< in: query thread */
{
  dberr_t err;

  DBUG_TRACE;

  ut_ad(dtuple_check_typed(node->row));

  err = row_ins_index_entry_set_vals(node->index, node->entry, node->row);

  if (err != DB_SUCCESS) {
    return err;
  }

  ut_ad(dtuple_check_typed(node->entry));

  err = row_ins_index_entry(node->index, node->entry, node->ins_multi_val_pos,
                            thr);

  DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
                      "after_row_ins_index_entry_step");

  return err;
}
static dberr_t row_ins_index_entry(dict_index_t *index, dtuple_t *entry,
                                   uint32_t &multi_val_pos, que_thr_t *thr) {
  ut_ad(thr_get_trx(thr)->id != 0);

  DBUG_EXECUTE_IF("row_ins_index_entry_timeout", {
    DBUG_SET("-d,row_ins_index_entry_timeout");
    return (DB_LOCK_WAIT);
  });

  if (index->is_clustered()) {
    return (row_ins_clust_index_entry(index, entry, thr, false));
  } else if (index->is_multi_value()) {
    return (
        row_ins_sec_index_multi_value_entry(index, entry, multi_val_pos, thr));
  } else {
    return (row_ins_sec_index_entry(index, entry, thr, false));
  }
}

最后一键三连击,根据情况分别调用 三个函数:

dberr_t row_ins_clust_index_entry(
    dict_index_t *index, /*!< in: clustered index */
    dtuple_t *entry,     /*!< in/out: index entry to insert */
    que_thr_t *thr,      /*!< in: query thread */
    bool dup_chk_only)
/*!< in: if true, just do duplicate check
and return. don't execute actual insert. */
{
  dberr_t err;
  ulint n_uniq;

  DBUG_TRACE;

  if (!index->table->foreign_set.empty()) {
    err = row_ins_check_foreign_constraints(index->table, index, entry, thr);
    if (err != DB_SUCCESS) {
      return err;
    }
  }

  n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0;

  /* Try first optimistic descent to the B-tree */
  uint32_t flags;

  if (!index->table->is_intrinsic()) {
    log_free_check();
    flags = index->table->is_temporary() ? BTR_NO_LOCKING_FLAG : 0;

    /* For intermediate table of copy alter operation,
    skip undo logging and record lock checking for
    insertion operation. */
    if (index->table->skip_alter_undo) {
      flags |= BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG;
    }

  } else {
    flags = BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG;
  }

  if (index->table->is_intrinsic() && dict_index_is_auto_gen_clust(index)) {
    /* Check if the memory allocated for intrinsic cache*/
    if (!index->last_ins_cur) {
      dict_allocate_mem_intrinsic_cache(index);
    }
    err = row_ins_sorted_clust_index_entry(BTR_MODIFY_LEAF, index, entry, thr);
  } else {
    err = row_ins_clust_index_entry_low(flags, BTR_MODIFY_LEAF, index, n_uniq,
                                        entry, thr, dup_chk_only);
  }

  DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
                      "after_row_ins_clust_index_entry_leaf");

  if (err != DB_FAIL) {
    DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
    return err;
  }

  /* Try then pessimistic descent to the B-tree */
  if (!index->table->is_intrinsic()) {
    log_free_check();
  } else if (!index->last_sel_cur) {
    dict_allocate_mem_intrinsic_cache(index);
    index->last_sel_cur->invalid = true;
  } else {
    index->last_sel_cur->invalid = true;
  }

  if (index->table->is_intrinsic() && dict_index_is_auto_gen_clust(index)) {
    err = row_ins_sorted_clust_index_entry(BTR_MODIFY_TREE, index, entry, thr);
  } else {
    err = row_ins_clust_index_entry_low(flags, BTR_MODIFY_TREE, index, n_uniq,
                                        entry, thr, dup_chk_only);
  }

  return err;
}

/** Inserts an entry into a secondary index. Tries first optimistic,
 then pessimistic descent down the tree. If the entry matches enough
 to a delete marked record, performs the insert by updating or delete
 unmarking the delete marked record.
 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
dberr_t row_ins_sec_index_entry(
    dict_index_t *index, /*!< in: secondary index */
    dtuple_t *entry,     /*!< in/out: index entry to insert */
    que_thr_t *thr,      /*!< in: query thread */
    bool dup_chk_only)
/*!< in: if true, just do duplicate check
and return. don't execute actual insert. */
{
  dberr_t err;
  mem_heap_t *offsets_heap;
  mem_heap_t *heap;
  trx_id_t trx_id = 0;

  DBUG_EXECUTE_IF("row_ins_sec_index_entry_timeout", {
    DBUG_SET("-d,row_ins_sec_index_entry_timeout");
    return (DB_LOCK_WAIT);
  });

  DBUG_EXECUTE_IF("row_ins_sec_index_entry_lock_wait", {
    static uint16_t count = 0;
    if (index->is_multi_value()) {
      ++count;
    }
    if (count == 2) {
      count = 0;
      return (DB_LOCK_WAIT);
    }
  });

  if (!index->table->foreign_set.empty()) {
    err = row_ins_check_foreign_constraints(index->table, index, entry, thr);
    if (err != DB_SUCCESS) {
      return (err);
    }
  }

  offsets_heap = mem_heap_create(1024);
  heap = mem_heap_create(1024);

  /* Try first optimistic descent to the B-tree */

  uint32_t flags;

  if (!index->table->is_intrinsic()) {
    log_free_check();
    ut_ad(thr_get_trx(thr)->id != 0);

    flags = index->table->is_temporary() ? BTR_NO_LOCKING_FLAG : 0;
    /* For intermediate table during copy alter table,
    skip the undo log and record lock checking for
    insertion operation. */
    if (index->table->skip_alter_undo) {
      trx_id = thr_get_trx(thr)->id;
      flags |= BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG;
    }

  } else {
    flags = BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG;
  }

  err = row_ins_sec_index_entry_low(flags, BTR_MODIFY_LEAF, index, offsets_heap,
                                    heap, entry, trx_id, thr, dup_chk_only);
  if (err == DB_FAIL) {
    mem_heap_empty(heap);

    /* Try then pessimistic descent to the B-tree */

    if (!index->table->is_intrinsic()) {
      log_free_check();
    } else if (!index->last_sel_cur) {
      dict_allocate_mem_intrinsic_cache(index);
      index->last_sel_cur->invalid = true;
    } else {
      index->last_sel_cur->invalid = true;
    }

    err =
        row_ins_sec_index_entry_low(flags, BTR_MODIFY_TREE, index, offsets_heap,
                                    heap, entry, 0, thr, dup_chk_only);
  }

  mem_heap_free(heap);
  mem_heap_free(offsets_heap);
  return (err);
}

/** Inserts an entry into a secondary index, which is created for
multi-value field. For each value to be inserted, it tries first optimistic,
then pessimistic descent down the tree. If the entry matches enough
to a delete marked record, performs the insert by updating or delete
unmarking the delete marked record.
@param[in]      index           secondary index which is for multi-value field
@param[in,out]  entry           index entry to insert
@param[in,out]  multi_val_pos   the start position to insert next multi-value
                                data, and the returned value should be either
                                0 if all are done, or the position where the
                                insert failed. So return value of 0 could be
                                a bit ambiguous, however the return error
                                can help to see which case it is
@param[in]      thr             query thread
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
static dberr_t row_ins_sec_index_multi_value_entry(dict_index_t *index,
                                                   dtuple_t *entry,
                                                   uint32_t &multi_val_pos,
                                                   que_thr_t *thr) {
  ut_d(trx_t *trx = thr_get_trx(thr));

  ut_ad(trx->id != 0);
  ut_ad(!index->table->is_intrinsic());
  ut_ad(index->is_committed());
  ut_ad(!dict_index_is_online_ddl(index));
  ut_ad(index->is_multi_value());

  dberr_t err = DB_SUCCESS;
  Multi_value_entry_builder_insert mv_entry_builder(index, entry);

  for (dtuple_t *mv_entry = mv_entry_builder.begin(multi_val_pos);
       mv_entry != nullptr; mv_entry = mv_entry_builder.next()) {
    err = row_ins_sec_index_entry(index, mv_entry, thr, false);
    if (err != DB_SUCCESS) {
      multi_val_pos = mv_entry_builder.last_multi_value_position();
      return (err);
    }
  }

  multi_val_pos = 0;

  return (err);
}

找一个Cluster的分析一下:

dberr_t row_ins_clust_index_entry_low(
    uint32_t flags,      /*!< in: undo logging and locking flags */
    ulint mode,          /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
                         depending on whether we wish optimistic or
                         pessimistic descent down the index tree */
    dict_index_t *index, /*!< in: clustered index */
    ulint n_uniq,        /*!< in: 0 or index->n_uniq */
    dtuple_t *entry,     /*!< in/out: index entry to insert */
    que_thr_t *thr,      /*!< in: query thread, or NULL if
                         flags & (BTR_NO_LOCKING_FLAG
                         | BTR_NO_UNDO_LOG_FLAG) and a duplicate
                         can't occur */
    bool dup_chk_only)
/*!< in: if true, just do duplicate check
and return. don't execute actual insert. */
{
  btr_pcur_t pcur;
  btr_cur_t *cursor;
  dberr_t err = DB_SUCCESS;
  big_rec_t *big_rec = nullptr;
  mtr_t mtr;
  mem_heap_t *offsets_heap = nullptr;
  ulint offsets_[REC_OFFS_NORMAL_SIZE];
  ulint *offsets = offsets_;
  rec_offs_init(offsets_);

  DBUG_TRACE;

#ifdef UNIV_DEBUG
  mtr_t temp_mtr;
  temp_mtr.start();
  mtr_s_lock(dict_index_get_lock(index), &temp_mtr);

  if (btr_height_get(index, &temp_mtr) >= BTR_MAX_NODE_LEVEL &&
      btr_cur_limit_optimistic_insert_debug > 1 &&
      btr_cur_limit_optimistic_insert_debug < 5) {
    ib::error(ER_IB_MSG_BTREE_LEVEL_LIMIT_EXCEEDED, index->name());
    temp_mtr.commit();
    return (DB_BTREE_LEVEL_LIMIT_EXCEEDED);
  }

  temp_mtr.commit();
#endif

  ut_ad(index->is_clustered());
  ut_ad(!dict_index_is_unique(index) ||
        n_uniq == dict_index_get_n_unique(index));
  ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
  ut_ad((flags & (BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG)) ||
        !thr_get_trx(thr)->in_rollback);
  ut_ad(thr != nullptr || !dup_chk_only);

  mtr.start();

  if (index->table->is_temporary()) {
    /* Disable REDO logging as the lifetime of temp-tables is
    limited to server or connection lifetime and so REDO
    information is not needed on restart for recovery.
    Disable locking as temp-tables are local to a connection. */

    ut_ad(flags & BTR_NO_LOCKING_FLAG);
    ut_ad(!index->table->is_intrinsic() || (flags & BTR_NO_UNDO_LOG_FLAG));

    mtr.set_log_mode(MTR_LOG_NO_REDO);
  }

  if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) {
    mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
    mtr_s_lock(dict_index_get_lock(index), &mtr);
  }

  /* Note that we use PAGE_CUR_LE as the search mode, because then
  the function will return in both low_match and up_match of the
  cursor sensible values */
  btr_pcur_open(index, entry, PAGE_CUR_LE, mode, &pcur, &mtr);
  cursor = btr_pcur_get_btr_cur(&pcur);
  cursor->thr = thr;

  ut_ad(!index->table->is_intrinsic() ||
        cursor->page_cur.block->made_dirty_with_no_latch);

#ifdef UNIV_DEBUG
  {
    page_t *page = btr_cur_get_page(cursor);
    rec_t *first_rec = page_rec_get_next(page_get_infimum_rec(page));

    ut_ad(page_rec_is_supremum(first_rec) ||
          rec_n_fields_is_sane(index, first_rec, entry));
  }
#endif /* UNIV_DEBUG */

  /* Write logs for AUTOINC right after index lock has been got and
  before any further resource acquisitions to prevent deadlock.
  No need to log for temporary tables and intermediate tables */
  if (!index->table->is_temporary() && !index->table->skip_alter_undo &&
      dict_table_has_autoinc_col(index->table)) {
    ib_uint64_t counter =
        row_get_autoinc_counter(entry, index->table->autoinc_field_no);

    if (counter != 0) {
      /* Always log the counter change first, so it won't
      be affected by any follow-up failure. */
      dict_table_autoinc_log(index->table, counter, &mtr);
    }
  }

  /* Allowing duplicates in clustered index is currently enabled
  only for intrinsic table and caller understand the limited
  operation that can be done in this case. */
  ut_ad(!index->allow_duplicates ||
        (index->allow_duplicates && index->table->is_intrinsic()));

  if (!index->allow_duplicates && n_uniq &&
      (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
    if (flags == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG |
                  BTR_KEEP_SYS_FLAG)) {
      /* Set no locks when applying log
      in online table rebuild. Only check for duplicates. */
      err = row_ins_duplicate_error_in_clust_online(n_uniq, entry, cursor,
                                                    &offsets, &offsets_heap);

      switch (err) {
        case DB_SUCCESS:
          break;
        default:
          ut_ad(0);
          /* fall through */
        case DB_SUCCESS_LOCKED_REC:
        case DB_DUPLICATE_KEY:
          if (thr != nullptr) {
            thr_get_trx(thr)->error_index = cursor->index;
          }
      }
    } else {
      /* Note that the following may return also
      DB_LOCK_WAIT */

      err = row_ins_duplicate_error_in_clust(flags, cursor, entry, thr, &mtr);
    }

    if (err != DB_SUCCESS) {
    err_exit:
      mtr.commit();
      goto func_exit;
    }
  }

  if (dup_chk_only) {
    mtr.commit();
    goto func_exit;
  }
  /* Note: Allowing duplicates would qualify for modification of
  an existing record as the new entry is exactly same as old entry.
  Avoid this check if allow duplicates is enabled. */
  if (!index->allow_duplicates && row_ins_must_modify_rec(cursor)) {
    /* There is already an index entry with a long enough common
    prefix, we must convert the insert into a modify of an
    existing record */
    mem_heap_t *entry_heap = mem_heap_create(1024);

    /* If the existing record is being modified and the new record
    doesn't fit the provided slot then existing record is added
    to free list and new record is inserted. This also means
    cursor that we have cached for SELECT is now invalid. */
    if (index->last_sel_cur) {
      index->last_sel_cur->invalid = true;
    }

    ut_ad(thr != nullptr);
    err = row_ins_clust_index_entry_by_modify(&pcur, flags, mode, &offsets,
                                              &offsets_heap, entry_heap, entry,
                                              thr, &mtr);

    if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
      row_log_table_insert(btr_cur_get_rec(cursor), entry, index, offsets);
    }

    mtr.commit();
    mem_heap_free(entry_heap);
  } else {
    rec_t *insert_rec;

    if (mode != BTR_MODIFY_TREE) {
      ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
      err = btr_cur_optimistic_insert(flags, cursor, &offsets, &offsets_heap,
                                      entry, &insert_rec, &big_rec, thr, &mtr);
    } else {
      if (buf_LRU_buf_pool_running_out()) {
        err = DB_LOCK_TABLE_FULL;
        goto err_exit;
      }

      DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust");

      err = btr_cur_optimistic_insert(flags, cursor, &offsets, &offsets_heap,
                                      entry, &insert_rec, &big_rec, thr, &mtr);

      if (err == DB_FAIL) {
        err =
            btr_cur_pessimistic_insert(flags, cursor, &offsets, &offsets_heap,
                                       entry, &insert_rec, &big_rec, thr, &mtr);

        if (index->table->is_intrinsic() && err == DB_SUCCESS) {
          row_ins_temp_prebuilt_tree_modified(index->table);
        }
      }
    }

    if (big_rec != nullptr) {
      mtr.commit();

      /* Online table rebuild could read (and
      ignore) the incomplete record at this point.
      If online rebuild is in progress, the
      row_ins_index_entry_big_rec() will write log. */

      DBUG_EXECUTE_IF("row_ins_extern_checkpoint",
                      log_make_latest_checkpoint(););
      err = row_ins_index_entry_big_rec(thr_get_trx(thr), entry, big_rec,
                                        offsets, &offsets_heap, index,
                                        thr_get_trx(thr)->mysql_thd);
      dtuple_convert_back_big_rec(entry, big_rec);
    } else {
      if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
        row_log_table_insert(insert_rec, entry, index, offsets);
      }

      mtr.commit();
    }
  }

func_exit:
  if (offsets_heap != nullptr) {
    mem_heap_free(offsets_heap);
  }

  btr_pcur_close(&pcur);

  DBUG_EXECUTE_IF(
      "ib_sdi", if (dict_table_is_sdi(index->table->id)) {
        ib::info(ER_IB_MSG_959)
            << "ib_sdi: row_ins_clust_index_entry_low: " << index->name << " "
            << index->table->name << " return status: " << err;
      });

  return err;
}
static MY_ATTRIBUTE((warn_unused_result)) dberr_t
    row_ins_duplicate_error_in_clust(
        ulint flags,           /*!< in: undo logging and locking flags */
        btr_cur_t *cursor,     /*!< in: B-tree cursor */
        const dtuple_t *entry, /*!< in: entry to insert */
        que_thr_t *thr,        /*!< in: query thread */
        mtr_t *mtr)            /*!< in: mtr */
{
  dberr_t err;
  rec_t *rec;
  ulint n_unique;
  trx_t *trx = thr_get_trx(thr);
  mem_heap_t *heap = nullptr;
  ulint offsets_[REC_OFFS_NORMAL_SIZE];
  ulint *offsets = offsets_;
  rec_offs_init(offsets_);

  UT_NOT_USED(mtr);

  ut_ad(cursor->index->is_clustered());

  /* NOTE: For unique non-clustered indexes there may be any number
  of delete marked records with the same value for the non-clustered
  index key (remember multiversioning), and which differ only in
  the row refererence part of the index record, containing the
  clustered index key fields. For such a secondary index record,
  to avoid race condition, we must FIRST do the insertion and after
  that check that the uniqueness condition is not breached! */

  /* NOTE: A problem is that in the B-tree node pointers on an
  upper level may match more to the entry than the actual existing
  user records on the leaf level. So, even if low_match would suggest
  that a duplicate key violation may occur, this may not be the case. */

  n_unique = dict_index_get_n_unique(cursor->index);

  if (cursor->low_match >= n_unique) {
    rec = btr_cur_get_rec(cursor);

    if (!page_rec_is_infimum(rec)) {
      offsets =
          rec_get_offsets(rec, cursor->index, offsets, ULINT_UNDEFINED, &heap);

      /* We set a lock on the possible duplicate: this
      is needed in logical logging of MySQL to make
      sure that in roll-forward we get the same duplicate
      errors as in original execution */

      if (flags & BTR_NO_LOCKING_FLAG) {
        /* Do nothing if no-locking is set */
        err = DB_SUCCESS;
      } else {
        /* If the SQL-query will update or replace
        duplicate key we will take X-lock for
        duplicates ( REPLACE, LOAD DATAFILE REPLACE,
        INSERT ON DUPLICATE KEY UPDATE). */

        err = row_ins_set_rec_lock(row_allow_duplicates(thr) ? LOCK_X : LOCK_S,
                                   LOCK_REC_NOT_GAP, btr_cur_get_block(cursor),
                                   rec, cursor->index, offsets, thr);
      }

      switch (err) {
        case DB_SUCCESS_LOCKED_REC:
        case DB_SUCCESS:
          break;
        default:
          goto func_exit;
      }

      if (row_ins_dupl_error_with_rec(rec, entry, cursor->index, offsets)) {
      duplicate:
        trx->error_index = cursor->index;
        err = DB_DUPLICATE_KEY;
        goto func_exit;
      }
    }
  }

  if (cursor->up_match >= n_unique) {
    rec = page_rec_get_next(btr_cur_get_rec(cursor));

    if (!page_rec_is_supremum(rec)) {
      offsets =
          rec_get_offsets(rec, cursor->index, offsets, ULINT_UNDEFINED, &heap);

      /* If the SQL-query will update or replace
      duplicate key we will take X-lock for
      duplicates ( REPLACE, LOAD DATAFILE REPLACE,
      INSERT ON DUPLICATE KEY UPDATE). */

      err = row_ins_set_rec_lock(row_allow_duplicates(thr) ? LOCK_X : LOCK_S,
                                 LOCK_REC_NOT_GAP, btr_cur_get_block(cursor),
                                 rec, cursor->index, offsets, thr);

      switch (err) {
        case DB_SUCCESS_LOCKED_REC:
        case DB_SUCCESS:
          break;
        default:
          goto func_exit;
      }

      if (row_ins_dupl_error_with_rec(rec, entry, cursor->index, offsets)) {
        goto duplicate;
      }
    }

    /* This should never happen */
    ut_error;
  }

  err = DB_SUCCESS;
func_exit:
  if (UNIV_LIKELY_NULL(heap)) {
    mem_heap_free(heap);
  }
  return (err);
}

其它的就是日志啥的操作了,这样一条记录就完整的插入到数据库表中了。当然,这其中细节还有很多,讲到哪儿,再分析,不要想着一蹴而就。

三、总结

端午节假日,努力吧!不负年华不负卿!
在这里插入图片描述

  • 3
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 5
    评论
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值