预测健康成本源代码,欢迎拿走学习

//患者吸烟,区域,年龄,bmi和孩子数,使用回归算法做出预测,该机器学习模型会根据性别预测医疗保健费用。

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "predicting_health_costs_with_regression.ipynb",
      "private_outputs": true,
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true,
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/emilyliublair/Machine-Learning-Projects/blob/main/predicting_health_costs_with_regression.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1rRo8oNqZ-Rj"
      },
      "source": [
        "# Import libraries. You may or may not use all of these.\n",
        "!pip install -q git+https://github.com/tensorflow/docs\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "\n",
        "try:\n",
        "  # %tensorflow_version only exists in Colab.\n",
        "  %tensorflow_version 2.x\n",
        "except Exception:\n",
        "  pass\n",
        "import tensorflow as tf\n",
        "\n",
        "from tensorflow import keras\n",
        "from tensorflow.keras import layers\n",
        "\n",
        "import tensorflow_docs as tfdocs\n",
        "import tensorflow_docs.plots\n",
        "import tensorflow_docs.modeling"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "CiX2FI4gZtTt"
      },
      "source": [
        "# Import data\n",
        "!wget https://cdn.freecodecamp.org/project-data/health-costs/insurance.csv\n",
        "dataset = pd.read_csv('insurance.csv')\n",
        "dataset.tail()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vSdWjpo5NYYJ"
      },
      "source": [
        "#split dataset\n",
        "train_dataset = dataset[0:int(.8*len(dataset))]\n",
        "test_dataset = dataset[int(.8*len(dataset)):]\n",
        "\n",
        "train_labels = train_dataset.pop('expenses')\n",
        "test_labels = test_dataset.pop('expenses')\n",
        "\n",
        "print(train_dataset.head())"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Xm7ktqGkK764"
      },
      "source": [
        "train_dataset.age.hist(bins=20)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xaQPaz2ELfKu"
      },
      "source": [
        "train_dataset.sex.value_counts().plot(kind='barh')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5ZumKGdVLSyl"
      },
      "source": [
        "pd.concat([train_dataset, train_labels], axis=1).groupby('sex').expenses.mean().plot(kind='barh')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YNC5fYssH7EP"
      },
      "source": [
        "#input function\n",
        "def input_fn(features, labels, training=True, batch_size=256):\n",
        "  dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))\n",
        "  if training:\n",
        "    dataset=dataset.shuffle(1000).repeat()\n",
        "  print(dataset.batch(batch_size))\n",
        "  return (dataset.batch(batch_size))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7kjF9gTDLhjx"
      },
      "source": [
        "#preprocess data\n",
        "categorical = ['sex', 'smoker', 'region']\n",
        "numeric = ['age', 'bmi', 'children']\n",
        "\n",
        "feature_columns = []\n",
        "for feature_name in categorical:\n",
        "  vocab = train_dataset[feature_name].unique()\n",
        "  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocab))\n",
        "\n",
        "for feature_name in numeric:\n",
        "  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))\n",
        "\n",
        "print(feature_columns)\n",
        "\n",
        "train_dataset = train_dataset.replace({'no':0, 'yes':1,'male':0,'female':1,'southwest':0,'southeast':1,'northwest':2,'northeast':3})\n",
        "test_dataset = test_dataset.replace({'no':0, 'yes':1,'male':0,'female':1,'southwest':0,'southeast':1,'northwest':2,'northeast':3})\n",
        "print(train_dataset)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WMXs-aReR5v2"
      },
      "source": [
        "normalizer = tf.keras.layers.experimental.preprocessing.Normalization()\n",
        "normalizer.adapt(np.array(train_dataset))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2JdEAuBuSTo_"
      },
      "source": [
        "#create model\n",
        "model = tf.keras.Sequential([\n",
        "    normalizer,\n",
        "    layers.Dense(128, activation='relu'),\n",
        "    layers.Dense(64, activation=\"relu\"),\n",
        "    layers.Dense(1, activation=\"relu\")\n",
        "])\n",
        "\n",
        "model.summary()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KROKonv2TVRY"
      },
      "source": [
        "#train model\n",
        "model.compile(optimizer=tf.keras.optimizers.Adam(.001), loss='mean_absolute_error', metrics=['mae','mse'])\n",
        "\n",
        "history=model.fit(\n",
        "    train_dataset, train_labels, epochs=500\n",
        ")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zRFyCzfqTHj9"
      },
      "source": [
        "#prediciton\n",
        "model.predict(train_dataset[:10])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rBA4i7SwWs2W"
      },
      "source": [
        "#evaluate model\n",
        "test_results = model.evaluate(test_dataset, test_labels)\n",
        "print(test_results)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Xe7RXH3N3CWU"
      },
      "source": [
        "# RUN THIS CELL TO TEST YOUR MODEL. DO NOT MODIFY CONTENTS.\n",
        "# Test model by checking how well the model generalizes using the test set.\n",
        "loss, mae, mse = model.evaluate(test_dataset, test_labels, verbose=2)\n",
        "\n",
        "print(\"Testing set Mean Abs Error: {:5.2f} expenses\".format(mae))\n",
        "\n",
        "if mae < 3500:\n",
        "  print(\"You passed the challenge. Great job!\")\n",
        "else:\n",
        "  print(\"The Mean Abs Error must be less than 3500. Keep trying.\")\n",
        "\n",
        "# Plot predictions.\n",
        "test_predictions = model.predict(test_dataset).flatten()\n",
        "\n",
        "a = plt.axes(aspect='equal')\n",
        "plt.scatter(test_labels, test_predictions)\n",
        "plt.xlabel('True values (expenses)')\n",
        "plt.ylabel('Predictions (expenses)')\n",
        "lims = [0, 50000]\n",
        "plt.xlim(lims)\n",
        "plt.ylim(lims)\n",
        "_ = plt.plot(lims,lims)\n"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

打工人何苦为难打工人

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值