Skip to content

Commit 2c487c2

Browse files
tag-chat (#902)
* tag-chat * Moved to exsisting dir
1 parent eeeebcf commit 2c487c2

File tree

6 files changed

+911
-0
lines changed

6 files changed

+911
-0
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Timeseries Data Generator - Expanded Version
2+
import os
3+
import numpy as np
4+
import pandas as pd
5+
from crate import client
6+
from dotenv import load_dotenv
7+
from datetime import datetime, timedelta
8+
9+
# === Load environment variables ===
10+
load_dotenv()
11+
12+
CRATEDB_HOST = os.getenv("CRATEDB_HOST")
13+
CRATEDB_PORT = os.getenv("CRATEDB_PORT")
14+
CRATEDB_USER = os.getenv("CRATEDB_USER")
15+
CRATEDB_PASSWORD = os.getenv("CRATEDB_PASSWORD")
16+
CRATEDB_SCHEMA = os.getenv("CRATEDB_SCHEMA")
17+
18+
# ----- PARAMETERS -----
19+
NUM_MACHINES = 10
20+
DAYS = 30
21+
FREQ = "15min" # 15-minute intervals
22+
23+
# Calculate number of readings
24+
total_readings = int((24 * 60 / 15) * DAYS * NUM_MACHINES) # ~10K
25+
26+
# ----- DATA GENERATION -----
27+
def generate_timeseries_data():
28+
dfs = []
29+
now = datetime.now()
30+
length = int((24 * 60 / 15) * DAYS)
31+
32+
for i in range(NUM_MACHINES):
33+
start = pd.Timestamp(now - timedelta(minutes=15 * length))
34+
timestamps = pd.date_range(start, periods=length, freq=FREQ)
35+
36+
# Generate data
37+
vibration = 0.75 + 0.25 * np.sin(np.linspace(0, 20, length)) + np.random.normal(0, 0.05, length)
38+
temperature = 50 + 10 * np.sin(np.linspace(0, 10, length)) + np.random.normal(0, 1, length)
39+
rotations = 1500 + 100 * np.cos(np.linspace(0, 15, length)) + np.random.normal(0, 10, length)
40+
41+
# Inject anomalies
42+
if np.random.rand() > 0.3:
43+
anomaly_index = np.random.randint(100, length - 100)
44+
vibration[anomaly_index:anomaly_index + 5] += np.random.uniform(1, 2)
45+
temperature[anomaly_index:anomaly_index + 5] += np.random.uniform(10, 20)
46+
47+
df = pd.DataFrame({
48+
"timestamp": timestamps,
49+
"vibration": vibration,
50+
"temperature": temperature,
51+
"rotations": rotations,
52+
"machine_id": i
53+
})
54+
dfs.append(df)
55+
return pd.concat(dfs)
56+
57+
58+
# ----- STORE IN CRATEDB -----
59+
def store_in_cratedb(df):
60+
connection = client.connect(
61+
f"https://{CRATEDB_HOST}:{CRATEDB_PORT}",
62+
username=CRATEDB_USER,
63+
password=CRATEDB_PASSWORD
64+
)
65+
cursor = connection.cursor()
66+
67+
# Create table if not exists
68+
cursor.execute(f"""
69+
CREATE TABLE IF NOT EXISTS {CRATEDB_SCHEMA}.motor_readings (
70+
machine_id INTEGER,
71+
timestamp TIMESTAMP,
72+
vibration DOUBLE,
73+
temperature DOUBLE,
74+
rotations DOUBLE
75+
)
76+
""")
77+
78+
# Insert data
79+
data = [(int(row.machine_id), row.timestamp.to_pydatetime(), float(row.vibration), float(row.temperature), float(row.rotations)) for _, row in df.iterrows()]
80+
cursor.executemany(f"""
81+
INSERT INTO {CRATEDB_SCHEMA}.motor_readings
82+
(machine_id, timestamp, vibration, temperature, rotations)
83+
VALUES (?, ?, ?, ?, ?)
84+
""", data)
85+
connection.close()
86+
print(f"✅ Stored {len(df)} rows in CrateDB.")
87+
88+
89+
# ----- MAIN -----
90+
if __name__ == "__main__":
91+
df = generate_timeseries_data()
92+
print(df.head())
93+
94+
store_in_cratedb(df)
95+
96+
print(f"Total generated readings: {len(df)}")
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import os
2+
import random
3+
from dotenv import load_dotenv
4+
from crate import client
5+
6+
# === Load environment variables ===
7+
load_dotenv()
8+
9+
CRATEDB_HOST = os.getenv("CRATEDB_HOST")
10+
CRATEDB_PORT = os.getenv("CRATEDB_PORT")
11+
CRATEDB_USER = os.getenv("CRATEDB_USER")
12+
CRATEDB_PASSWORD = os.getenv("CRATEDB_PASSWORD")
13+
CRATEDB_SCHEMA = os.getenv("CRATEDB_SCHEMA")
14+
15+
# === Fictional Manual Generator ===
16+
brands = ["AtlasTech", "RotoFlow", "MechAxis", "IndustraCore"]
17+
models = ["VX100", "MX200", "TQ350", "RG450"]
18+
year_range = list(range(2017, 2023))
19+
20+
21+
def generate_manual(machine_id):
22+
brand = random.choice(brands)
23+
model = random.choice(models)
24+
year = random.choice(year_range)
25+
26+
vib_max = round(random.uniform(1.2, 1.6), 2)
27+
temp_max = round(random.uniform(65, 75), 1)
28+
rpm_max = random.randint(1550, 1650)
29+
30+
content = f"""
31+
🛠️ Machine Manual — ID: {machine_id}
32+
33+
**Manufacturer:** {brand}
34+
**Model:** {model}
35+
**Year of Installation:** {year}
36+
37+
---
38+
39+
**Operational Limits:**
40+
- Max Vibration: {vib_max} units
41+
- Max Temperature: {temp_max}°C
42+
- Max RPM: {rpm_max} rotations/min
43+
44+
**Anomaly Detection:**
45+
- Vibration > {vib_max} may indicate imbalance or bearing issues
46+
- Temperature > {temp_max} may suggest overheating
47+
- RPM deviations > ±100 RPM require inspection
48+
49+
---
50+
51+
**Maintenance Schedule:**
52+
- Weekly: Inspect vibration and temperature logs
53+
- Monthly: Lubricate bearings and check alignment
54+
- Quarterly: Full motor calibration and safety check
55+
56+
**Emergency Protocol:**
57+
If vibration exceeds {vib_max + 0.2} or temperature exceeds {temp_max + 5}:
58+
1. Immediately reduce load
59+
2. Shut down the motor if anomaly persists for >5 mins
60+
3. Notify operations lead and schedule maintenance
61+
62+
---
63+
64+
**Contact:**
65+
- Support: support@{brand.lower()}.com
66+
- Manual Version: 1.0
67+
"""
68+
return content.strip()
69+
70+
71+
def store_manuals_in_cratedb():
72+
connection = client.connect(
73+
f"https://{CRATEDB_HOST}:{CRATEDB_PORT}",
74+
username=CRATEDB_USER,
75+
password=CRATEDB_PASSWORD
76+
)
77+
cursor = connection.cursor()
78+
79+
# Create table if not exists
80+
cursor.execute(f"""
81+
CREATE TABLE IF NOT EXISTS {CRATEDB_SCHEMA}.machine_manuals (
82+
machine_id INTEGER PRIMARY KEY,
83+
manual TEXT
84+
)
85+
""")
86+
87+
# Get unique machine IDs from motor_readings
88+
cursor.execute(f"""
89+
SELECT DISTINCT machine_id FROM {CRATEDB_SCHEMA}.motor_readings ORDER BY machine_id
90+
""")
91+
machine_ids = [row[0] for row in cursor.fetchall()]
92+
93+
print(f"🔍 Found {len(machine_ids)} unique machine IDs.")
94+
95+
# Upsert manuals
96+
for machine_id in machine_ids:
97+
manual = generate_manual(machine_id)
98+
cursor.execute(f"""
99+
INSERT INTO {CRATEDB_SCHEMA}.machine_manuals (machine_id, manual)
100+
VALUES (?, ?)
101+
ON CONFLICT (machine_id) DO UPDATE SET manual = ?
102+
""", (machine_id, manual, manual))
103+
104+
connection.close()
105+
print("✅ Fictional machine manuals stored (or updated) in CrateDB.")
106+
107+
# === Main Execution ===
108+
if __name__ == "__main__":
109+
store_manuals_in_cratedb()
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
# 🛠️ Timeseries QA Assistant with CrateDB, LLMs, and Machine Manuals
2+
3+
This project provides a full interactive pipeline for simulating telemetry data from industrial motors, storing that data in CrateDB, and enabling natural-language querying powered by OpenAI — including RAG-style guidance from machine manuals.
4+
5+
---
6+
7+
## 📦 Features
8+
9+
- **Synthetic Data Generation** for 10 industrial motors over 30 days
10+
- **CrateDB Integration** for timeseries storage and fast querying
11+
- **Fictional Machine Manuals** linked to each machine for RAG-style enrichment
12+
- **LLM-Powered Chat Interface** with context-aware SQL generation
13+
- **Emergency Protocol Suggestions** based on detected anomalies
14+
15+
---
16+
17+
## 🚀 Setup & Installation
18+
19+
1. **Install dependencies**
20+
21+
```bash
22+
pip install -r requirements.txt
23+
```
24+
25+
2. **Create a .env file in the root directory**
26+
27+
``` bash
28+
OPENAI_API_KEY=your_openai_api_key
29+
CRATEDB_HOST=localhost
30+
CRATEDB_PORT=4200
31+
```
32+
33+
3. **Ensure CrateDB is running locally (or adapt host/port to remote)**
34+
You can use docker-compose with this `docker-compose.yml`
35+
``` yaml
36+
version: "3.9"
37+
services:
38+
cratedb:
39+
container_name: cratedb-chatbot
40+
image: crate
41+
ports:
42+
- "4200:4200"
43+
- "5432:5432"
44+
environment:
45+
- CRATE_HEAP_SIZE=1g
46+
deploy:
47+
replicas: 1
48+
restart_policy:
49+
condition: on-failure%
50+
```
51+
52+
Run docker-compose:
53+
``` bash
54+
docker-compose pull
55+
docker-compose up -d
56+
```
57+
58+
59+
## Pipeline overview
60+
61+
1. **Generate Timeseries Data**
62+
63+
Creates realistic vibration, temperature, and rotation logs every 15 minutes for 10 machines.
64+
65+
``` bash
66+
python DataGenerator.py
67+
```
68+
Output should look like:
69+
70+
``` bash
71+
timestamp vibration temperature rotations machine_id
72+
0 2025-03-09 10:29:35.015476 0.751030 48.971560 1609.573066 0
73+
1 2025-03-09 10:44:35.015476 0.774157 49.696297 1601.617712 0
74+
2 2025-03-09 10:59:35.015476 0.709293 49.308419 1603.563044 0
75+
3 2025-03-09 11:14:35.015476 0.817229 51.463994 1586.055485 0
76+
4 2025-03-09 11:29:35.015476 0.795769 49.277951 1596.797612 0
77+
✅ Stored 28800 rows in CrateDB.
78+
Total generated readings: 28800
79+
```
80+
81+
2. **Generate & Store Machine Manuals**
82+
83+
Populates machine_manuals with fictional documentation for each machine, including:
84+
• Operational limits
85+
• Anomaly detection triggers
86+
• Emergency protocols
87+
• Maintenance schedules
88+
89+
``` bash
90+
python Generate-Manuals.py
91+
```
92+
93+
Output:
94+
``` bash
95+
✅ Fictional machine manuals stored in CrateDB.
96+
```
97+
98+
3. **Run the Q&A Assistant**
99+
100+
Launch the interactive assistant:
101+
102+
``` bash
103+
python tag-motor-chat.py
104+
```
105+
106+
Example output:
107+
108+
``` bash
109+
Timeseries Q&A Assistant (type 'exit' to quit)
110+
111+
Example Questions:
112+
• What is the average temperature when vibration > 1.5?
113+
• What is the average temperature when vibration > 1.5 for motor 5?
114+
• How many anomalies happened last week?
115+
• What was the time of highest vibration for each machine?
116+
• What should I do if machine 2 has an anomaly?
117+
• What does the maintenace plan for machine 1 look like?
118+
119+
Data Overview:
120+
- Total readings: 1000
121+
- Time range: 2025-04-07 11:29:35 to 2025-04-08 12:14:35
122+
- Machines: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
123+
- Vibration range: 0.76 to 1.11
124+
- Temperature range: 40.69°C to 50.27°C
125+
- Rotations range: 1402 RPM to 1492 RPM
126+
- Anomalies (vibration > 1.5): 0
127+
```
128+
129+
## Supported Queries
130+
Try natural language prompts like:
131+
"Show top 5 vibration events last month"
132+
"When was the last anomaly for each machine?"
133+
"What should I do if machine 3 has an anomaly?" → Triggers manual-based response
134+
"How many anomalies occurred between March 10 and March 15?"
135+
136+
## How It Works
137+
• The assistant uses OpenAI GPT-3.5 to translate your question into SQL.
138+
• SQL is executed directly on CrateDB, pulling up real telemetry.
139+
• If anomalies (vibration > 1.5) are found, it retrieves relevant manual sections.
140+
• All results are summarized and explained naturally.
141+
142+
143+
## Architecture
144+
+--------------------------+
145+
| generate_timeseries.py |
146+
| → motor_readings (SQL) |
147+
+--------------------------+
148+
149+
+------------------------+
150+
| generate_manuals.py |
151+
| → machine_manuals (SQL)|
152+
+------------------------+
153+
154+
+--------------------------+
155+
| rag-motor-chat.py |
156+
| - OpenAI Q&A |
157+
| - Manual-based Guidance |
158+
+--------------------------+

0 commit comments

Comments
 (0)