Using AWS Bedrock To Query A MySQL Database And Store Results In CSV
Reference: https://medium.com/@mahmood1/using-aws-bedrock-to-query-a-mysql-database-5c33c083324f
Step 1: Follow https://medium.com/@mahmood1/using-aws-bedrock-to-query-a-mysql-database-5c33c083324f and create RDS instance and Bedrock Lambda function.
Step 2: Connect to MySQL instance and execute following commands:
CREATE DATABASE SchoolDB;
USE SchoolDB;
CREATE TABLE Students (
StudentID INT AUTO_INCREMENT PRIMARY KEY,
FirstName VARCHAR(50),
LastName VARCHAR(50),
BirthDate DATE
);
CREATE TABLE Courses (
CourseID INT AUTO_INCREMENT PRIMARY KEY,
CourseName VARCHAR(100),
Credits INT
);
CREATE TABLE Professors (
ProfessorID INT AUTO_INCREMENT PRIMARY KEY,
FirstName VARCHAR(50),
LastName VARCHAR(50),
Department VARCHAR(50)
);
CREATE TABLE Enrollments (
EnrollmentID INT AUTO_INCREMENT PRIMARY KEY,
StudentID INT,
CourseID INT,
ProfessorID INT,
EnrollmentDate DATE,
FOREIGN KEY (StudentID) REFERENCES Students(StudentID),
FOREIGN KEY (CourseID) REFERENCES Courses(CourseID),
FOREIGN KEY (ProfessorID) REFERENCES Professors(ProfessorID)
);
INSERT INTO Students (FirstName, LastName, BirthDate) VALUES
('John', 'Doe', '2000-01-15'),
('Jane', 'Smith', '1999-03-22'),
('Sam', 'Brown', '2001-06-30');
INSERT INTO Courses (CourseName, Credits) VALUES
('Mathematics', 3),
('Biology', 4),
('Literature', 2);
INSERT INTO Professors (FirstName, LastName, Department) VALUES
('Alice', 'Johnson', 'Mathematics'),
('Robert', 'Wilson', 'Biology'),
('Emily', 'Davis', 'Literature');
INSERT INTO Enrollments (StudentID, CourseID, ProfessorID, EnrollmentDate) VALUES
(1, 1, 1, '2024-01-10'),
(2, 2, 2, '2024-01-15'),
(3, 3, 3, '2024-01-20'),
(1, 2, 2, '2024-01-25'),
(2, 3, 3, '2024-02-01');
Step 3: Use following code for Lambda. Replace <S3_BUCKET_NAME>, <DATABASE_HOST>,<DATABASE_USERNAME> and <DATABASE_PASSWORD> with actual values
import json
import boto3
import urllib3
import base64
import re
import os
import pymysql
import csv
from pymysql import Error
from botocore.response import StreamingBody
from urllib.parse import parse_qs
from io import StringIO
from datetime import date, datetime
bedrock = boto3.client(service_name='bedrock-runtime')
s3 = boto3.client('s3')
# AWS S3 configuration
s3_bucket_name = '<S3_BUCKET_NAME>'
s3_file_name = 'data.csv'
http = urllib3.PoolManager()
html_code = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>ChatBox</title>
</head>
<body>
<form action="/" method="POST" style="float: left;">
<label for="message">Enter your message:</label>
<br>
<textarea id="message" name="message" rows="30" cols="100">ReplaceMesgHere</textarea>
<br>
<input type="submit" value="Submit">
</form>
<div style="float:left;width:300px;height:300px;margin-left: 40px;">
<h3>Results</h3>
<p>ResHere</p>
<div>
</body>
</html>
'''
prompt = '''
Here is my MySQL tables:\n\n
```
CREATE TABLE Students (
StudentID INT AUTO_INCREMENT PRIMARY KEY,
FirstName VARCHAR(50),
LastName VARCHAR(50),
BirthDate DATE
);
```
\n\n
```
CREATE TABLE Courses (
CourseID INT AUTO_INCREMENT PRIMARY KEY,
CourseName VARCHAR(100),
Credits INT
);
```
\n\n
```
CREATE TABLE Professors (
ProfessorID INT AUTO_INCREMENT PRIMARY KEY,
FirstName VARCHAR(50),
LastName VARCHAR(50),
Department VARCHAR(50)
);
```
\n\n
```
CREATE TABLE Enrollments (
EnrollmentID INT AUTO_INCREMENT PRIMARY KEY,
StudentID INT,
CourseID INT,
ProfessorID INT,
EnrollmentDate DATE,
FOREIGN KEY (StudentID) REFERENCES Students(StudentID),
FOREIGN KEY (CourseID) REFERENCES Courses(CourseID),
FOREIGN KEY (ProfessorID) REFERENCES Professors(ProfessorID)
);
```
\n\n
'''
endprompt = "\n\nONLY OUTPUT SQL CODE ENCLOSED IN THREE BACKTICKS."
def serialize_dates(data):
"""Recursively convert date objects to strings in a dictionary."""
if isinstance(data, dict):
return {k: serialize_dates(v) for k, v in data.items()}
elif isinstance(data, list):
return [serialize_dates(item) for item in data]
elif isinstance(data, (date, datetime)):
return data.isoformat()
else:
return data
def call_bedrock(prompt):
body = json.dumps({
"prompt": prompt,
"maxTokens": 3000,
"temperature": 0.5,
"topP": 1,
})
modelId = 'ai21.j2-ultra-v1'
accept = 'application/json'
contentType = 'application/json'
response = bedrock.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)
if isinstance(response.get('body'), StreamingBody):
response_content = response['body'].read().decode('utf-8')
else:
response_content = response.get('body')
response_body = json.loads(response_content)
return response_body.get('completions')[0].get('data').get('text')
def query_mysql_database(query):
try:
connection = pymysql.connect(
host='<DATABASE_HOST>',
database='SchoolDB',
user='<DATABASE_USERNAME>',
password='<DATABASE_PASSWORD>',
cursorclass=pymysql.cursors.DictCursor
)
if connection.open:
with connection.cursor() as cursor:
cursor.execute(query)
results = cursor.fetchall()
return results
except Error as e:
print(f"Error: {e}")
return None
finally:
if connection.open:
connection.close()
def lambda_handler(event, context):
msg = ""
results = ""
if 'body' in event:
if event.get('isBase64Encoded', False):
body = base64.b64decode(event['body']).decode('utf-8')
query_params = parse_qs(body)
msg = call_bedrock(prompt + query_params['message'][0])
sql = re.findall(r'```([\s\S]*?)```', msg)
if len(sql) > 0:
res = query_mysql_database(sql[0].replace('\n', ' '))
if res:
results = serialize_dates(res)
# Convert dictionary data to CSV
csv_file = StringIO()
csv_writer = csv.DictWriter(csv_file, fieldnames=results[0].keys())
csv_writer.writeheader()
csv_writer.writerows(results)
csv_file.seek(0)
s3.put_object(Bucket=s3_bucket_name, Key=s3_file_name, Body=csv_file.getvalue())
print(f"CSV file successfully uploaded to s3://{s3_bucket_name}/{s3_file_name}")
return {
'statusCode': 200,
'headers': {
'Content-Type': 'text/html',
},
'body': html_code.replace('ReplaceMesgHere', msg).replace('ResHere', json.dumps(results))
}
Step 4: Access Lambda function url and fire a query
Get students details who are enrolled in a course along with course name and enrollment date
and you will see a response like this.
Now go to the S3 bucket and download data.csv file and it will look like this