{
"type": "object",
"title": "PII Image Redaction Plugin Configuration",
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {
"padding": {
"type": "number",
"title": "Padding",
"default": 10,
"description": "Padding in pixels around detected text to ensure complete redaction coverage."
},
"entities": {
"type": "array",
"items": {
"type": "string"
},
"title": "Entity Types",
"examples": [
[
"EMAIL_ADDRESS",
"PHONE_NUMBER",
"PERSON"
]
],
"description": "List of PII entity types to detect and redact in images. If not specified, all detected entities will be redacted."
},
"language": {
"type": "string",
"title": "Language",
"default": "en",
"examples": [
"en",
"es",
"de",
"fr"
],
"description": "Language code for OCR text analysis."
},
"deny_list": {
"type": "array",
"items": {
"type": "string"
},
"title": "Deny List",
"description": "List of terms/patterns that should always be redacted from images."
},
"allow_list": {
"type": "array",
"items": {
"type": "string"
},
"title": "Allow List",
"description": "List of terms/patterns that should not be redacted from images."
},
"fill_color": {
"oneOf": [
{
"type": "string",
"examples": [
"black",
"white",
"gray"
]
},
{
"type": "array",
"items": {
"type": "number",
"maximum": 255,
"minimum": 0
},
"examples": [
[
0,
0,
0
]
],
"maxItems": 3,
"minItems": 3
}
],
"title": "Fill Color",
"default": "black",
"description": "Fill color for redacted areas. Can be color name or RGB tuple [R, G, B] (0-255)"
},
"ocr_kwargs": {
"type": "object",
"title": "OCR Arguments",
"examples": [
{
"lang": "eng",
"config": "--psm 6"
}
],
"description": "Additional keyword arguments to pass to the OCR engine (Tesseract). See Tesseract documentation for available options.",
"additionalProperties": true
},
"score_threshold": {
"type": "number",
"title": "Score Threshold",
"default": 0.5,
"maximum": 1,
"minimum": 0,
"description": "Minimum confidence score (0-1) required to redact an entity."
},
"ad_hoc_recognizers": {
"type": "array",
"items": {
"type": "object",
"required": [
"name",
"supported_language",
"patterns",
"supported_entity"
],
"properties": {
"name": {
"type": "string",
"title": "Name",
"description": "Unique name for the recognizer"
},
"context": {
"type": "array",
"items": {
"type": "string"
},
"title": "Context Words",
"description": "Context words to improve detection"
},
"patterns": {
"type": "array",
"items": {
"type": "string"
},
"title": "Patterns",
"description": "Regex patterns to match"
},
"supported_entity": {
"type": "string",
"title": "Supported Entity",
"description": "Entity type this recognizer detects"
},
"supported_language": {
"type": "string",
"title": "Supported Language",
"description": "Language code this recognizer supports"
}
}
},
"title": "Custom Recognizers",
"examples": [
[
{
"name": "employee_id_recognizer",
"context": [
"employee",
"staff",
"id"
],
"patterns": [
"EMP-\\d{6}"
],
"supported_entity": "EMPLOYEE_ID",
"supported_language": "en"
}
]
],
"description": "Custom regex-based recognizers for detecting specific patterns in images."
}
},
"description": "Configuration for the Presidio-based image redaction plugin that detects and redacts PII from images using OCR."
}