Obsidian with Local API and Firefox Extension
I will split this post in three sections
- Obsidian Plugin (Covered in another article: https://webuxlab.com/en/projects/obsidian-plugin)
- Small Local API in NodeJS
- Firefox Extension
First Section - Obsidian Plugin
The goal of this plugin is to provide an endpoint that will receive data using JSON (encoded in base64) and save it in a page.
Please see : https://webuxlab.com/en/projects/obsidian-plugin
Second Section - The local API
I am familiar with puppeteer, so this is what I use all the time when it is time to do some crawling.
index.js
const express = require("express");
const app = express();
const processing = require("./processing");
const cors = require("cors");
const { open } = require("lmdb");
const Queue = require("better-queue");
const openBrowser = require("open");
const q = new Queue(doStuff, {
concurrent: 2,
maxRetries: 3,
retryDelay: 5000,
maxTimeout: 60000,
});
const hostname = "127.0.0.1";
const port = 3000;
let myDB = open({
path: "entries",
// any options go here, we can turn on compression like this:
compression: true,
});
app.use(cors());
app.use(express.json());
async function doStuff(input, cb) {
const data = await processing(input);
await myDB.put(data.centrisNo || new Date().toISOString(), { ...data });
return cb(null, { input, data });
}
app.get("/status", async (req, res) => {
res.send({
queueInfo: q.getStats(),
info: null,
});
});
app.post("/", async (req, res) => {
try {
res.setHeader("Access-Control-Allow-Origin", "*");
res.setHeader("Access-Control-Request-Method", "*");
res.setHeader("Access-Control-Allow-Methods", "OPTIONS, GET");
res.setHeader("Access-Control-Allow-Headers", "*");
if (req.method === "OPTIONS") {
res.writeHead(200);
res.end();
return;
}
const urls = req.body.datas;
if (urls && urls.length === 0) throw new Error("No Url provided.");
urls.map((url) =>
q
.push(url)
.on("finish", async function ({ input, data }) {
console.log("Finished", input);
delete data.errors;
await openBrowser(
`obsidian://endpoint-local/?data=${Buffer.from(
JSON.stringify([data])
).toString("base64")}`,
{ background: true }
);
})
.on("failed", function (err) {
console.error("Error:", err);
})
);
res.send("Thank You!");
} catch (e) {
console.error(e.message);
console.error(e.stack);
res.send("Oops");
}
});
app.listen(port, () => {
console.log(`Server running at http://${hostname}:${port}/`);
});
processing.js:
const puppeteer = require("puppeteer");
module.exports = async (url) => {
const browser = await puppeteer.launch({
headless: true,
timeout: 60000,
});
const page = await browser.newPage();
console.log(`Working on it... (${url})`);
const data = { errors: [], datetime: new Date() };
try {
page.setDefaultTimeout(2000);
page.setUserAgent("SET_YOUR_USER_AGENT");
await page.goto(url, { timeout: 20000 });
// Set screen size
await page.setViewport({ width: 1512, height: 2050 });
// TODO: Add your puppeteer commands and set the result in the data variable
data.url = url;
data.foo = "bar";
//...
} catch (e) {
console.log(e.stack);
data.errors.push(
`${url} - Element might not be found for - '${e.message}'`
);
} finally {
await browser.close();
return data;
}
};
package.json
{
"name": "api",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"author": "Studio Webux",
"license": "MIT",
"dependencies": {
"better-queue": "^3.8.12",
"cors": "^2.8.5",
"express": "^4.18.2",
"lmdb": "^2.7.9",
"puppeteer": "^19.6.3"
}
}
- I use
better-queueto control the quantity of requests to process, my local computer is limited, so I need to easily control the concurrency of puppeteer. If you are familiar with AWS, this is similar to SQS. corsandexpressare together, it simplies offer an API endpoint.lmdbis completely optional, I was wondering if a local nosql database was out there. For the small amount of tests I did with it... it works great ! lightweight and no setup, perfect for my small projects and experimentations.puppeteeris the tool used to crawl the web page.
Third Section - Firefox extension
I'm far from an expert, I did two of them so far.
I won't cover all details, I do recommend that you read the official documentation. It is quite easy to understand.
background.js
function postData(url = "") {
return browser.storage.sync.get("endpoint").then(
async function (data) {
console.log(data);
const response = await fetch(data.endpoint, {
method: "POST",
mode: "cors",
cache: "no-cache",
credentials: "omit",
headers: { "Content-Type": "application/json" },
redirect: "follow",
referrerPolicy: "no-referrer",
body: JSON.stringify({
datas: [url],
}),
});
return response.text();
},
function (error) {
console.error(error);
throw new Error(error);
}
);
}
function handleMessage(message) {
if (message.url) {
return postData(message.url).then(function () {
return true;
});
} else {
throw new Error("Missing an URL");
}
}
browser.runtime.onMessage.addListener(handleMessage);
browser.webNavigation.onHistoryStateUpdated.addListener(
function () {
browser.tabs.executeScript(null, { file: "logic.js" });
},
{
url: [{ originAndPathMatches: "^.+://.*/.+/.+$" }],
}
);
logic.js
browser.storage.sync.set({
endpoint: "http://localhost:3000",
});
document.addEventListener("DOMContentLoaded", async function () {
document
.getElementById("fetchBtn")
.addEventListener("click", async function () {
browser.tabs.query({ currentWindow: true, active: true }).then((tabs) => {
browser.runtime
.sendMessage({ url: tabs[0].url })
.then(function () {
let done = document.createElement("p");
done.textContent = "Data sent to Collector with success";
done.setAttribute("class", "alert alert-success mt-3 mb-2");
document.body.appendChild(done);
})
.catch(function (e) {
let failed = document.createElement("p");
failed.textContent = "An error has occured, " + e.message;
failed.setAttribute("class", "alert alert-danger mt-3 mb-2");
document.body.appendChild(failed);
});
});
});
});
popup.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Crawl URL</title>
<script src="./logic.js"></script>
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
integrity="sha384-B0vP5xmATw1+K9KRQjQERJvTumQW0nPEzvF6L/Z6nronJ3oUOFUFpCjEUQouq2+l"
crossorigin="anonymous"
/>
</head>
<body class="p-3">
<button id="fetchBtn" class="btn btn-secondary">Fetch</button>
</body>
</html>
manifest.json
{
"manifest_version": 2,
"name": "TODO",
"version": "1.0",
"browser_specific_settings": {
"gecko": {
"id": "TODO",
"strict_min_version": "93.0"
}
},
"description": "TODO",
"icons": {
"128": "icons/logoAmpoule_142.png"
},
"browser_action": {
"default_popup": "popup.html",
"browser_style": true
},
"permissions": ["tabs", "webNavigation", "storage"],
"background": {
"scripts": ["background.js"]
}
}
Conclusion
It is a POC to validate the infinite power of obsidian !
I am more than happy in regards of what is possible with this editor, we can create and adapt a bunch of flows together to extend it we our custom needs.