{
"cells": [
{
"cell_type": "markdown",
"id": "470561fa",
"metadata": {},
"source": [
"Install the latest OpenAI package..."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b36942c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: openai in e:\\anaconda3\\lib\\site-packages (0.27.9)\n",
"Requirement already satisfied: tqdm in e:\\anaconda3\\lib\\site-packages (from openai) (4.64.1)\n",
"Requirement already satisfied: requests>=2.20 in e:\\anaconda3\\lib\\site-packages (from openai) (2.28.1)\n",
"Requirement already satisfied: aiohttp in e:\\anaconda3\\lib\\site-packages (from openai) (3.8.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in e:\\anaconda3\\lib\\site-packages (from requests>=2.20->openai) (3.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in e:\\anaconda3\\lib\\site-packages (from requests>=2.20->openai) (2022.12.7)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in e:\\anaconda3\\lib\\site-packages (from requests>=2.20->openai) (1.26.13)\n",
"Requirement already satisfied: charset-normalizer<3,>=2 in e:\\anaconda3\\lib\\site-packages (from requests>=2.20->openai) (2.1.1)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in e:\\anaconda3\\lib\\site-packages (from aiohttp->openai) (1.3.3)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in e:\\anaconda3\\lib\\site-packages (from aiohttp->openai) (1.3.1)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in e:\\anaconda3\\lib\\site-packages (from aiohttp->openai) (6.0.2)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in e:\\anaconda3\\lib\\site-packages (from aiohttp->openai) (4.0.2)\n",
"Requirement already satisfied: attrs>=17.3.0 in e:\\anaconda3\\lib\\site-packages (from aiohttp->openai) (22.1.0)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in e:\\anaconda3\\lib\\site-packages (from aiohttp->openai) (1.8.1)\n",
"Requirement already satisfied: colorama in e:\\anaconda3\\lib\\site-packages (from tqdm->openai) (0.4.6)\n"
]
}
],
"source": [
"!pip install openai --upgrade"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "00df62fb",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from openai import OpenAI\n",
"client = OpenAI()"
]
},
{
"cell_type": "markdown",
"id": "276cb367",
"metadata": {},
"source": [
"Upload our training and evaluation files, in chat completion format:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "638aa772",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<File file id=file-9lI2ovFA1UJskgOPpxDTwEhG at 0x2266872ea90> JSON: {\n",
" \"object\": \"file\",\n",
" \"id\": \"file-9lI2ovFA1UJskgOPpxDTwEhG\",\n",
" \"purpose\": \"fine-tune\",\n",
" \"filename\": \"file\",\n",
" \"bytes\": 1774941,\n",
" \"created_at\": 1692794707,\n",
" \"status\": \"uploaded\",\n",
" \"status_details\": null\n",
"}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.files.create(\n",
" file=open(\"./DATA_train.jsonl\", \"rb\"),\n",
" purpose='fine-tune'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "2d3da0ad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<File file id=file-UqPVnkk9z8Q74BEUqPlnhjHL at 0x226669e59f0> JSON: {\n",
" \"object\": \"file\",\n",
" \"id\": \"file-UqPVnkk9z8Q74BEUqPlnhjHL\",\n",
" \"purpose\": \"fine-tune\",\n",
" \"filename\": \"file\",\n",
" \"bytes\": 442619,\n",
" \"created_at\": 1692794711,\n",
" \"status\": \"uploaded\",\n",
" \"status_details\": null\n",
"}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.files.create(\n",
" file=open(\"./DATA_eval.jsonl\", \"rb\"),\n",
" purpose='fine-tune'\n",
")"
]
},
{
"cell_type": "markdown",
"id": "b45dc0e1",
"metadata": {},
"source": [
"Check the status of these files by copying in the returned ID's above. If there are JSON errors they will be reported here."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "ffc4be66",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<File file id=file-UqPVnkk9z8Q74BEUqPlnhjHL at 0x2266865c180> JSON: {\n",
" \"object\": \"file\",\n",
" \"id\": \"file-UqPVnkk9z8Q74BEUqPlnhjHL\",\n",
" \"purpose\": \"fine-tune\",\n",
" \"filename\": \"file\",\n",
" \"bytes\": 442619,\n",
" \"created_at\": 1692794711,\n",
" \"status\": \"uploaded\",\n",
" \"status_details\": null\n",
"}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.files.retrieve(\"file-UqPVnkk9z8Q74BEUqPlnhjHL\")"
]
},
{
"cell_type": "markdown",
"id": "0558d96b",
"metadata": {},
"source": [
"Start our fine tuning job! Copy in the ID's for our uploaded training and validation files."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "7f83856c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<FineTuningJob fine_tuning.job id=ftjob-mQlhbPB5vsog1SeDLNx2xAMj at 0x226669e58b0> JSON: {\n",
" \"object\": \"fine_tuning.job\",\n",
" \"id\": \"ftjob-mQlhbPB5vsog1SeDLNx2xAMj\",\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"created_at\": 1692794886,\n",
" \"finished_at\": null,\n",
" \"fine_tuned_model\": null,\n",
" \"organization_id\": \"org-DBeDgDH8c36NSJobwuaBPXrW\",\n",
" \"result_files\": [],\n",
" \"status\": \"created\",\n",
" \"validation_file\": \"file-UqPVnkk9z8Q74BEUqPlnhjHL\",\n",
" \"training_file\": \"file-9lI2ovFA1UJskgOPpxDTwEhG\",\n",
" \"hyperparameters\": {\n",
" \"n_epochs\": 3\n",
" },\n",
" \"trained_tokens\": null\n",
"}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.fine_tuning.jobs.create(training_file=\"file-9lI2ovFA1UJskgOPpxDTwEhG\", validation_file=\"file-UqPVnkk9z8Q74BEUqPlnhjHL\", model=\"gpt-3.5-turbo\")"
]
},
{
"cell_type": "markdown",
"id": "52ab0e27",
"metadata": {},
"source": [
"Get general info about this job."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e6621b29",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<FineTuningJob fine_tuning.job id=ftjob-mQlhbPB5vsog1SeDLNx2xAMj at 0x22666a16220> JSON: {\n",
" \"object\": \"fine_tuning.job\",\n",
" \"id\": \"ftjob-mQlhbPB5vsog1SeDLNx2xAMj\",\n",
" \"model\": \"gpt-3.5-turbo-0613\",\n",
" \"created_at\": 1692794886,\n",
" \"finished_at\": null,\n",
" \"fine_tuned_model\": null,\n",
" \"organization_id\": \"org-DBeDgDH8c36NSJobwuaBPXrW\",\n",
" \"result_files\": [],\n",
" \"status\": \"running\",\n",
" \"validation_file\": \"file-UqPVnkk9z8Q74BEUqPlnhjHL\",\n",
" \"training_file\": \"file-9lI2ovFA1UJskgOPpxDTwEhG\",\n",
" \"hyperparameters\": {\n",
" \"n_epochs\": 3\n",
" },\n",
" \"trained_tokens\": null\n",
"}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.fine_tuning.jobs.retrieve(\"ftjob-mQlhbPB5vsog1SeDLNx2xAMj\")"
]
},
{
"cell_type": "markdown",
"id": "f3c4e264",
"metadata": {},
"source": [
"Monitor its progress. When done, you can use the resulting fine tuned model ID in the playground (or the API)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "a38f20e2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<OpenAIObject list at 0x22668759950> JSON: {\n",
" \"object\": \"list\",\n",
" \"data\": [\n",
" {\n",
" \"object\": \"fine_tuning.job.event\",\n",
" \"id\": \"ftevent-n0GA9lmPtAulghPIgsfSSdM1\",\n",
" \"created_at\": 1692797270,\n",
" \"level\": \"info\",\n",
" \"message\": \"Fine-tuning job successfully completed\",\n",
" \"data\": null,\n",
" \"type\": \"message\"\n",
" },\n",
" {\n",
" \"object\": \"fine_tuning.job.event\",\n",
" \"id\": \"ftevent-miblzvSktANikUk7sJOQe6Ir\",\n",
" \"created_at\": 1692797267,\n",
" \"level\": \"info\",\n",
" \"message\": \"New fine-tuned model created: ft:gpt-3.5-turbo-0613:sundog-software-llc::7qiBf2gI\",\n",
" \"data\": null,\n",
" \"type\": \"message\"\n",
" },\n",
" {\n",
" \"object\": \"fine_tuning.job.event\",\n",
" \"id\": \"ftevent-VVGJVV3Ss6N6ROQjK4xhib6T\",\n",
" \"created_at\": 1692796566,\n",
" \"level\": \"info\",\n",
" \"message\": \"Step 1000: training loss=1.89\",\n",
" \"data\": {\n",
" \"step\": 1000,\n",
" \"train_loss\": 1.8927490711212158,\n",
" \"train_mean_token_accuracy\": 0.5443037748336792\n",
" },\n",
" \"type\": \"metrics\"\n",
" },\n",
" {\n",
" \"object\": \"fine_tuning.job.event\",\n",
" \"id\": \"ftevent-QiOj8MeAHinAFG4DD3JIeEA1\",\n",
" \"created_at\": 1692794887,\n",
" \"level\": \"info\",\n",
" \"message\": \"Fine tuning job started\",\n",
" \"data\": null,\n",
" \"type\": \"message\"\n",
" },\n",
" {\n",
" \"object\": \"fine_tuning.job.event\",\n",
" \"id\": \"ftevent-ZS3NuX0GHccf7llea6HHPU8e\",\n",
" \"created_at\": 1692794886,\n",
" \"level\": \"info\",\n",
" \"message\": \"Created fine-tune: ftjob-mQlhbPB5vsog1SeDLNx2xAMj\",\n",
" \"data\": null,\n",
" \"type\": \"message\"\n",
" }\n",
" ],\n",
" \"has_more\": false\n",
"}"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.fine_tuning.jobs.list_events(id=\"ftjob-mQlhbPB5vsog1SeDLNx2xAMj\", limit=10)"
]
},
{
"cell_type": "markdown",
"id": "45e8edcb",
"metadata": {},
"source": [
"For comparison, see how the non-fine-tuned GPT model does:"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "feaaf07d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"role\": \"assistant\",\n",
" \"content\": \"DATA: Aye, Captain. Scanning for lifeforms. However, as an android, I do not possess the biological sensors typically used for such scans. Instead, I rely on my advanced sensory systems and tricorder readings to detect signs of life.\\n\\n(Pauses briefly)\\n\\nDATA: I am detecting several lifeforms in the vicinity, Captain. Shall I provide detailed analysis?\"\n",
"}\n"
]
}
],
"source": [
"completion = client.chat.completions.create(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"Data is an android in the TV series Star Trek: The Next Generation.\"},\n",
" {\"role\": \"user\", \"content\": \"PICARD: Mr. Data, scan for lifeforms.\"}\n",
" ]\n",
")\n",
"\n",
"print(completion.choices[0].message)\n"
]
},
{
"cell_type": "markdown",
"id": "7ac0f853",
"metadata": {},
"source": [
"When it's done, try our fine-tuned model! Copy in our fine tuned ID."
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "1249edde",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"role\": \"assistant\",\n",
" \"content\": \"DATA: I am finding very low levels of radioactivity -- I cannot make a conclusive scan for lifeforms, Captain.\"\n",
"}\n"
]
}
],
"source": [
"completion = client.chat.completions.create(\n",
" model=\"ft:gpt-3.5-turbo-0613:sundog-software-llc::7qiBf2gI\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"Data is an android in the TV series Star Trek: The Next Generation.\"},\n",
" {\"role\": \"user\", \"content\": \"PICARD: Mr. Data, scan for lifeforms.\"}\n",
" ]\n",
")\n",
"\n",
"print(completion.choices[0].message)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82066210",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}