Download, store, and query your Outlook emails using a local LLM. Complete workflow from export to searchable AI assistant.
A private, local system that:
Outlook often auto-saves PST files. Search for them:
C:\Users\[YourName]\Documents\Outlook Files\C:\Users\[YourName]\AppData\Local\Microsoft\Outlook\*.pst on your drive# Install
brew install readpst
# Convert PST to mbox
readpst -o ./output -m yourfile.pst
# Convert to JSON
python3 pst_to_json.py yourfile.pst
Create extract_emails.py:
import email
from email import policy
import json
from pathlib import Path
def extract_emails(pst_file, output_file):
import extract_msg
emails = []
msg = extract_msg.Message(pst_file)
# Extract: subject, from, to, date, body, attachments
with open(output_file, 'w') as f:
json.dump(emails, f, indent=2)
if __name__ == '__main__':
# Your PST file
extract_emails('inbox.pst', 'emails.json')
pip install email-parser extract-msg
# Quick extract
python3 -c "import extract_msg; extract_msg.extract_msg('email.pst')"
pip install langchain langchain-community chromadb beautifulsoup4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
import json
# Load emails
with open('emails.json') as f:
emails = json.load(f)
# Create documents
documents = []
for email in emails:
doc = f"From: {email['from']}\nTo: {email['to']}\nSubject: {email['subject']}\nDate: {email['date']}\n\n{email['body']}"
documents.append(doc)
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
chunks = splitter.create_documents(documents)
# Create embeddings with local Ollama
embeddings = OllamaEmbeddings(model='nomic-embed-text')
# Store in ChromaDB
db = Chroma.from_documents(chunks, embeddings, persist_directory='./email_db')
db.persist()
# Install local-ai
docker run -ti --rm -v $(pwd)/emails:/var/lib/local-ai/local-ai/pkg/model-storage ghcr.io/mudler/local-ai:latest
# Use embed API
curl http://localhost:8080/v1/embeddings -d '{
"input": "your email text here",
"model": "nomic-embed-text"
}'
Use Open WebUI with Ollama
# Run Ollama
ollama serve
ollama pull llama3.2
# Run Open WebUI
docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=http://host.docker.internal:11434 openwebui/open-webui:main
Then configure it to use your ChromaDB as knowledge base.
pip install streamlit langchain langchain-community
# Create app.py
import streamlit as st
from langchain_community.chat_models import ChatOllama
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings
st.title("๐ง Email Assistant")
# Load DB
embeddings = OllamaEmbeddings(model='nomic-embed-text')
db = Chroma(persist_directory='./email_db', embedding_function=embeddings)
# Query
query = st.text_input("Ask about your emails:")
if query:
docs = db.similarity_search(query)
st.write(docs[0].page_content)
Use Obsidian + Local LLM plugin
# sync_emails.sh
#!/bin/bash
# Export new emails from Outlook
python3 extract_emails.py --incremental
# Update vector DB
python3 update_vector_db.py
echo "Email DB updated!"
# Run every Sunday at 2am
0 2 * * 0 /path/to/sync_emails.sh >> /path/to/sync.log 2>&1
Only index emails newer than last sync date:
from datetime import datetime
from_date = datetime.fromisoformat('2026-01-01')
new_emails = [e for e in all_emails if e['date'] > from_date]
# Only embed new emails
| Solution | Type | Link |
|---|---|---|
| MailPilot | Full stack | GitHub |
| OutlookLLM | Outlook Add-in | GitHub |
| Email Assistant | RAG template | GitHub |