Add csv data loader example (#179)

bennetthardwick · web-flow · commit c34b1913aba5 · 2025-08-15T15:09:33.000+10:00
### Changelog
&lt;!-- Write a one-sentence summary of the user-impacting change (API,
UI/UX, performance, etc) that could appear in a changelog. Write "None"
if there is no user-facing change --&gt;

None

### Docs

&lt;!-- Link to a Docs PR, tracking ticket in Linear, OR write "None" if no
documentation changes are needed. --&gt;

None

### Description

Adds another example data loader for loading from a CSV. It uses an
index to keep track of where to start reading from. It logs each column
as a different topic and uses a "timestamp_nanos" column for the log
time.
diff --git a/examples/csv-data-loader/.gitignore b/examples/csv-data-loader/.gitignore
@@ -0,0 +1,5 @@
+dist
+node_modules
+rust/Cargo.lock
+rust/target
+package-lock.json
diff --git a/examples/csv-data-loader/.prettierrc.yaml b/examples/csv-data-loader/.prettierrc.yaml
@@ -0,0 +1,5 @@
+arrowParens: always
+printWidth: 100
+trailingComma: "all"
+tabWidth: 2
+semi: true
diff --git a/examples/csv-data-loader/README.md b/examples/csv-data-loader/README.md
@@ -0,0 +1,21 @@
+# CSV Data Loader
+
+This is a simple [Foxglove](http://foxglove.dev/) [extension](https://docs.foxglove.dev/docs/visualization/extensions) that loads a CSV file.
+The file must have a column called `timestamp_nanos` in order to be read.
+
+## Building
+
+Install rust with [rustup](https://www.rust-lang.org/tools/install), then install wasm32 support:
+
+```
+rustup target add wasm32-unknown-unknown
+```
+
+Then to build the rust code and generate the extension file:
+
+```
+npm install
+npm run package
+```
+
+These steps will produce a `.foxe` file you can install as an extension from the Foxglove settings page.
diff --git a/examples/csv-data-loader/config.ts b/examples/csv-data-loader/config.ts
@@ -0,0 +1,10 @@
+module.exports = {
+  webpack: (config) => {
+    // Set up Webpack to inline .wasm imports as a base64 URL
+    config.module.rules.push({
+      test: /\.wasm$/i,
+      type: "asset/inline",
+    });
+    return config;
+  },
+};
diff --git a/examples/csv-data-loader/eslint.config.js b/examples/csv-data-loader/eslint.config.js
@@ -0,0 +1,23 @@
+// @ts-check
+
+const foxglove = require("@foxglove/eslint-plugin");
+const globals = require("globals");
+const tseslint = require("typescript-eslint");
+
+module.exports = tseslint.config({
+  files: ["src/**/*.ts", "src/**/*.tsx"],
+  extends: [foxglove.configs.base, foxglove.configs.react, foxglove.configs.typescript],
+  languageOptions: {
+    globals: {
+      ...globals.es2020,
+      ...globals.browser,
+    },
+    parserOptions: {
+      project: "tsconfig.json",
+      tsconfigRootDir: __dirname,
+    },
+  },
+  rules: {
+    "react-hooks/exhaustive-deps": "error",
+  },
+});
diff --git a/examples/csv-data-loader/package.json b/examples/csv-data-loader/package.json
@@ -0,0 +1,33 @@
+{
+  "name": "csv-data-loader",
+  "displayName": "CSV Data Loader",
+  "description": "Data loader for loading CSV files. The file must have a field called 'timestamp_nanos' and be in ascending timestamp.",
+  "publisher": "Foxglove",
+  "version": "1.1.0",
+  "license": "MIT",
+  "main": "./dist/extension.js",
+  "scripts": {
+    "build": "npm run build:wasm && foxglove-extension build",
+    "build:wasm": "cd rust && cargo build --release --target wasm32-unknown-unknown",
+    "foxglove:prepublish": "npm run build:wasm && foxglove-extension build --mode production",
+    "lint:ci": "eslint --report-unused-disable-directives .",
+    "lint": "eslint --report-unused-disable-directives --fix .",
+    "local-install": "foxglove-extension install",
+    "package": "foxglove-extension package",
+    "pretest": "foxglove-extension pretest"
+  },
+  "devDependencies": {
+    "@foxglove/eslint-plugin": "2.0.0",
+    "@foxglove/extension": "2.29.0",
+    "@foxglove/schemas": "1.6.4",
+    "@types/react": "18.3.12",
+    "@types/react-dom": "18.3.1",
+    "@types/wicg-file-system-access": "2023.10.6",
+    "create-foxglove-extension": "1.0.4",
+    "eslint": "9.15.0",
+    "prettier": "3.3.3",
+    "react": "18.3.1",
+    "react-dom": "18.3.1",
+    "typescript": "5.7.2"
+  }
+}
diff --git a/examples/csv-data-loader/rust/Cargo.toml b/examples/csv-data-loader/rust/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "csv-foxglove-data-loader"
+version = "0.1.0"
+edition = "2024"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+anyhow = "1.0"
+csv = "1.3.1"
+foxglove_data_loader = "0.1.0"
+serde_json = "1.0.142"
diff --git a/examples/csv-data-loader/rust/src/lib.rs b/examples/csv-data-loader/rust/src/lib.rs
@@ -0,0 +1,215 @@
+use std::{
+    collections::{BTreeMap, BTreeSet},
+    io::{Cursor, Read},
+};
+
+use foxglove_data_loader::{
+    DataLoader, DataLoaderArgs, Initialization, Message, MessageIterator, MessageIteratorArgs,
+    reader::{self},
+};
+
+use anyhow::bail;
+use csv::StringRecord;
+use serde_json::json;
+
+#[derive(Default)]
+struct CsvDataLoader {
+    path: String,
+    /// Index of timestamp to byte offset
+    indexes: BTreeMap<u64, u64>,
+    /// The index of the field containing timestamp
+    log_time_index: usize,
+    /// The keys from the first row of the CSV
+    keys: Vec<String>,
+}
+
+impl DataLoader for CsvDataLoader {
+    type MessageIterator = CsvMessageIterator;
+    type Error = anyhow::Error;
+
+    fn new(args: DataLoaderArgs) -> Self {
+        let DataLoaderArgs { mut paths } = args;
+        assert_eq!(
+            paths.len(),
+            1,
+            "data loader is configured to only get one file"
+        );
+        Self {
+            path: paths.remove(0),
+            ..Default::default()
+        }
+    }
+
+    fn initialize(&mut self) -> Result<Initialization, Self::Error> {
+        let mut reader = csv::ReaderBuilder::new()
+            .has_headers(true)
+            .from_reader(reader::open(&self.path));
+
+        // Read the headers of the CSV and store them on the loader.
+        // We will turn each column into a topic so the CSV needs to have a header.
+        let headers = reader.headers()?;
+        self.keys = headers.iter().map(String::from).collect();
+
+        // Read through the keys and try to find a field called "timestamp_nanos". If this doesn't
+        // exit then we can't read the file as we have no way of knowing the log time.
+        let Some(log_time_index) = self.keys.iter().position(|k| k == "timestamp_nanos") else {
+            bail!("expected csv to contain column called timestamp_nanos")
+        };
+
+        // Store the column index of the timestamp to be used for the log time.
+        self.log_time_index = log_time_index;
+
+        let mut record = StringRecord::new();
+        let mut position = reader.position().byte();
+
+        // Read the entire file to build up an index of timestamps to byte position.
+        // Later on we'll use this index to make sure we can immediately start reading from the
+        // correct place. This will take a little bit of time when the file loads for the first
+        // time, but it will mean playback is snappy later on.
+        while reader.read_record(&mut record)? {
+            let timestamp_nanos: u64 = record[log_time_index].parse()?;
+            self.indexes.insert(timestamp_nanos, position);
+            position = reader.position().byte();
+        }
+
+        let mut builder = Initialization::builder()
+            .start_time(
+                self.indexes
+                    .first_key_value()
+                    .map(|(timestamp, _)| *timestamp)
+                    .unwrap_or(0),
+            )
+            .end_time(
+                self.indexes
+                    .last_key_value()
+                    .map(|(timestamp, _)| *timestamp)
+                    .unwrap_or(0),
+            );
+
+        for (i, key) in self.keys.iter().enumerate() {
+            // Don't add a channel for the column used for log time
+            if i == self.log_time_index {
+                continue;
+            }
+
+            builder
+                .add_channel_with_id(i as _, &format!("/{key}"))
+                .expect("channel is free")
+                .message_encoding("json")
+                .message_count(self.indexes.len() as _);
+        }
+
+        Ok(builder.build())
+    }
+
+    fn create_iter(
+        &mut self,
+        args: MessageIteratorArgs,
+    ) -> Result<Self::MessageIterator, Self::Error> {
+        let requested_channel_id = args.channels.into_iter().collect();
+
+        match self.indexes.range(args.start_time.unwrap_or(0)..).next() {
+            Some((_, byte_offset)) => {
+                let reader = reader::open(&self.path);
+                reader.seek(*byte_offset);
+
+                Ok(CsvMessageIterator {
+                    row_to_flush: Default::default(),
+                    log_time_index: self.log_time_index,
+                    requested_channel_id,
+                    reader: csv::Reader::from_reader(Box::new(reader)),
+                })
+            }
+            // If there is no byte offset (we've gone past the last timestamp), return empty iter
+            None => Ok(CsvMessageIterator {
+                log_time_index: self.log_time_index,
+                row_to_flush: Default::default(),
+                requested_channel_id: Default::default(),
+                reader: csv::Reader::from_reader(Box::new(Cursor::new([]))),
+            }),
+        }
+    }
+}
+
+struct CsvMessageIterator {
+    row_to_flush: Vec<Message>,
+    log_time_index: usize,
+    requested_channel_id: BTreeSet<u16>,
+    reader: csv::Reader<Box<dyn Read>>,
+}
+
+/// Try and coerce the string into a JSON value.
+///
+/// Try to convert to a f64, then bool, else finally return a string.
+fn to_json_value(value: &str) -> serde_json::Value {
+    if let Ok(v) = value.parse::<f64>() {
+        return json!(v);
+    }
+
+    if let Ok(v) = value.parse::<bool>() {
+        return json!(v);
+    }
+
+    json!(value)
+}
+
+impl MessageIterator for CsvMessageIterator {
+    type Error = anyhow::Error;
+
+    fn next(&mut self) -> Option<Result<Message, Self::Error>> {
+        loop {
+            // We emit each column of a row as its own message.
+            if let Some(message) = self.row_to_flush.pop() {
+                return Some(Ok(message));
+            }
+
+            let mut columns = StringRecord::new();
+
+            match self.reader.read_record(&mut columns) {
+                Err(e) => {
+                    return Some(Err(e.into()));
+                }
+                Ok(false) => {
+                    return None;
+                }
+                // fall through
+                Ok(true) => {}
+            }
+
+            // Get the log time for the row. This will need to be on every message.
+            let timestamp = match columns[self.log_time_index].parse::<u64>() {
+                Ok(t) => t,
+                Err(e) => {
+                    return Some(Err(e.into()));
+                }
+            };
+
+            for (index, cell) in columns.iter().enumerate() {
+                // Don't emit the timestamp column as a message
+                if index == self.log_time_index {
+                    continue;
+                }
+
+                let channel_id = index as u16;
+
+                // If this column wasn't requested, skip it
+                if !self.requested_channel_id.contains(&channel_id) {
+                    continue;
+                }
+
+                let data = serde_json::to_vec(&json!({ "value": to_json_value(cell) }))
+                    .expect("json will not fail to serialize");
+
+                // Add this message to the row and continue onto the next column
+                self.row_to_flush.push(Message {
+                    channel_id,
+                    log_time: timestamp,
+                    publish_time: timestamp,
+                    data,
+                });
+            }
+        }
+    }
+}
+
+foxglove_data_loader::export!(CsvDataLoader);
diff --git a/examples/csv-data-loader/src/globals.d.ts b/examples/csv-data-loader/src/globals.d.ts
@@ -0,0 +1,5 @@
+// Webpack is configured to import .wasm files as a base64 URL
+declare module "*.wasm" {
+  const url: string;
+  export default url;
+}
diff --git a/examples/csv-data-loader/src/index.ts b/examples/csv-data-loader/src/index.ts
@@ -0,0 +1,12 @@
+import { Experimental } from "@foxglove/extension";
+
+// Import the .wasm file as a base64 data URL to be bundled with the extension
+import wasmUrl from "../rust/target/wasm32-unknown-unknown/release/csv_foxglove_data_loader.wasm";
+
+export function activate(extensionContext: Experimental.ExtensionContext): void {
+  extensionContext.registerDataLoader({
+    type: "file",
+    wasmUrl,
+    supportedFileType: ".csv",
+  });
+}
diff --git a/examples/csv-data-loader/tsconfig.json b/examples/csv-data-loader/tsconfig.json
@@ -0,0 +1,26 @@
+{
+  "extends": "create-foxglove-extension/tsconfig/tsconfig.json",
+
+  "include": ["./src/**/*"],
+  "compilerOptions": {
+    "rootDir": "./src",
+    "outDir": "./dist",
+    "lib": ["DOM"],
+
+    // These two settings prevent typescript from emitting .d.ts files we don't need in
+    // the compiled extension.
+    "composite": false,
+    "declaration": false,
+
+    // Additional TypeScript error reporting checks are enabled by default to improve code quality.
+    // Enable/disable these checks as necessary to suit your coding preferences or work with
+    // existing code
+    "noFallthroughCasesInSwitch": true,
+    "noImplicitAny": true,
+    "noImplicitReturns": true,
+    "noUncheckedIndexedAccess": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "forceConsistentCasingInFileNames": true
+  }
+}