profile API
Data profiling for quality analysis and statistics. Enhanced statistics (histograms, outliers, category frequencies) are computed in WASM for Scale+ tiers.
npm install @rowops/profile
Core Functions
profileColumnsWasm
Profile columns using WASM for performance. For Scale+ tiers, enhanced statistics are computed automatically.
import { profileColumnsWasm } from "@rowops/profile";
import { resolveBrowserLicense } from "@rowops/import-core";
const { tierGateInit } = await resolveBrowserLicense({
projectId: "proj_xxx",
entitlementToken: "eyJ...",
});
const profiles = await profileColumnsWasm(arrowIpcBytes, {
tierGate: tierGateInit, // For enhanced stats
});
// For Scale+ tiers, profiles include:
// - numericStats: { min, max, mean, stdDev }
// - histogram: HistogramBin[]
// - outliers: OutlierStats
// - topCategories: CategoryFrequency[]
exportProfile
Export profile results as JSON.
import { exportProfile } from "@rowops/profile";
const jsonString = exportProfile(profiles);
Types
ColumnProfile
Profile data for a single column.
interface ColumnProfile {
field: string;
totalCount: number;
nonNullCount: number;
nullCount: number;
distinctCount: number;
topValues: { value: string; count: number }[];
// Numeric columns only
minNumeric?: number;
maxNumeric?: number;
mean?: number;
median?: number;
stddev?: number;
// String columns
minLength?: number;
maxLength?: number;
// Type inference
inferredType?: "string" | "number" | "boolean" | "date" | "email" | "text";
// Enhanced stats (Scale+ tier - computed in WASM)
numericStats?: NumericStats;
histogram?: HistogramBin[];
outliers?: OutlierStats;
topCategories?: CategoryFrequency[];
}
NumericStats
Statistics for numeric columns (Scale+ tier).
interface NumericStats {
min: number;
max: number;
mean: number;
stdDev: number;
}
HistogramBin
Histogram bin for value distribution (Scale+ tier).
interface HistogramBin {
min: number;
max: number;
count: number;
}
OutlierStats
Outlier detection using IQR method (Scale+ tier).
interface OutlierStats {
count: number; // Number of outliers
q1: number; // First quartile
q3: number; // Third quartile
iqr: number; // Interquartile range
lowerBound: number; // Q1 - 1.5*IQR
upperBound: number; // Q3 + 1.5*IQR
}
CategoryFrequency
Top value frequency for string columns (Scale+ tier).
interface CategoryFrequency {
value: string;
count: number;
percentage: number;
}
ProfileReport
Full profiling report.
interface ProfileReport {
tier: string;
topN: number;
profiles: ColumnProfile[];
generatedAt?: string;
}
Usage Example
import { profileColumnsWasm } from "@rowops/profile";
import { resolveBrowserLicense } from "@rowops/import-core";
import type { ColumnProfile } from "@rowops/profile";
async function analyzeData(arrowIpcBytes: Uint8Array) {
const { tierGateInit } = await resolveBrowserLicense({
projectId: "proj_xxx",
entitlementToken: "eyJ...",
});
// Get profiles (with enhanced stats for Scale+ tiers)
const profiles: ColumnProfile[] = await profileColumnsWasm(arrowIpcBytes, {
tierGate: tierGateInit,
});
// Log insights
for (const profile of profiles) {
console.log(`Column: ${profile.field}`);
console.log(` Type: ${profile.inferredType}`);
console.log(` Fill rate: ${(profile.nonNullCount / profile.totalCount * 100).toFixed(1)}%`);
console.log(` Distinct: ${profile.distinctCount}`);
// Enhanced stats (Scale+ tier)
if (profile.numericStats) {
console.log(` Mean: ${profile.numericStats.mean.toFixed(2)}`);
console.log(` Stddev: ${profile.numericStats.stdDev.toFixed(2)}`);
}
if (profile.outliers && profile.outliers.count > 0) {
console.log(` Outliers: ${profile.outliers.count}`);
}
if (profile.histogram) {
console.log(` Histogram: ${profile.histogram.length} bins`);
}
if (profile.topCategories && profile.topCategories.length > 0) {
console.log(` Top category: ${profile.topCategories[0].value} (${profile.topCategories[0].percentage.toFixed(1)}%)`);
}
}
return profiles;
}
With Importer Callback
import { RowOpsImporter } from "@rowops/importer";
<RowOpsImporter
projectId="proj_xxx"
schemaId="contacts"
publishableKey="pk_xxx"
onComplete={(result) => {
if (result.profile) {
console.log("Profile report:", result.profile);
for (const col of result.profile.profiles) {
const fillRate = col.nonNullCount / col.totalCount;
if (fillRate < 0.9) {
console.warn(`Low fill rate: ${col.field} (${(fillRate * 100).toFixed(0)}%)`);
}
// Check for outliers (Scale+ tier)
if (col.outliers && col.outliers.count > 0) {
console.warn(`${col.field} has ${col.outliers.count} outliers`);
}
}
}
}}
/>
Tier Restrictions
| Feature | Free | Pro | Scale | Enterprise |
|---|---|---|---|---|
| Basic profiling | Yes | Yes | Yes | Yes |
| Top values | 3 | 5 | 10 | 20 |
| Histograms | No | No | Yes | Yes |
| Outlier detection | No | No | Yes | Yes |
| Numeric stats | No | No | Yes | Yes |
| Category frequencies | No | No | Yes | Yes |