{"openapi":"3.0.0","info":{"title":"Struct PDF - OpenAPI Documentation","version":"1.0.0","description":"Extract structured data from documents (PDF or images: PNG, JPEG, HEIC) using a schema. Authenticate with API key via `Authorization: Bearer <key>` or `X-API-Key` header."},"servers":[{"url":"https://api.structpdf.com","description":"Struct PDF API"}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"API Key"},"apiKeyAuth":{"type":"apiKey","in":"header","name":"X-API-Key"}},"schemas":{"ExtractSuccess":{"type":"string","enum":["Complete","Partial","Fail"],"description":"Complete = all fields found; Partial = some missing or conflicting; Fail = extraction failed"},"Finding":{"type":"object","properties":{"schema_key":{"type":"string","description":"Dot-notation path of the extracted field (e.g. field, nested.field, items.0.name). Use this to map the value into the nested or array result. Array indices are numeric segments."},"value":{"nullable":true,"description":"Extracted value (type from schema)"},"page":{"type":"number","description":"1-based page number where the value was found"},"document_snippet":{"type":"string","maxLength":250,"description":"Short excerpt where the value was found (max 250 characters)"}},"required":["schema_key","page","document_snippet"]},"FieldError":{"type":"object","properties":{"schema_key":{"type":"string","description":"Dot-notation path that had an error (e.g. missing value or multiple conflicting values)"},"error_details":{"type":"string","description":"Error type: e.g. \"Value not found\", \"Multiple values found\""},"pages":{"type":"array","items":{"type":"number"},"description":"1-based page numbers relevant to the error"}},"required":["schema_key","error_details","pages"]},"ExtractMetadata":{"type":"object","properties":{"findings":{"type":"array","items":{"$ref":"#/components/schemas/Finding"}},"errors":{"type":"array","items":{"$ref":"#/components/schemas/FieldError"}}},"required":["findings","errors"],"description":"Findings (per-field with page and snippet) and per-field errors (e.g. value not found, multiple values)"},"ExtractResponse":{"type":"object","properties":{"generationId":{"type":"string","format":"uuid","description":"Unique extraction request UUID.","example":"550e8400-e29b-41d4-a716-446655440000"},"success":{"$ref":"#/components/schemas/ExtractSuccess"},"result":{"type":"object","additionalProperties":{"nullable":true},"description":"Extracted data nested by schema (dot-notation keys become nested objects; array indices become array elements)","example":{}},"metadata":{"$ref":"#/components/schemas/ExtractMetadata"}},"required":["generationId","success","result","metadata"]},"ExtractRequest":{"type":"object","properties":{"schema":{"type":"string","description":"Schema JSON in [Zod-based](https://zod.dev/) structured format. You can build it in the [Schema Builder](https://structpdf.com/schema).\n\nRequired when `schema_id` is not provided. If both `schema` and `schema_id` are sent, Struct PDF uses `schema_id` first and falls back to `schema` when needed.\n\nSupported shapes: flat fields, one level of nested objects, and arrays of one-level objects. Deeper nesting returns 400."},"schema_id":{"type":"string","format":"uuid","description":"Saved schema ID (UUID). Use this instead of sending `schema` inline when you want to reference an existing schema.\n\nIf both `schema_id` and `schema` are sent, Struct PDF uses `schema_id` first and falls back to `schema` when needed."},"file":{"type":"string","description":"Document file to extract data from (PDF, PNG, JPEG, or HEIC). Max 50MB.","format":"binary"}},"required":["file"]}},"parameters":{}},"paths":{"/v1/extract":{"post":{"summary":"Extract","description":"Extract structured data from a document (PDF, PNG, JPEG, or HEIC) using a schema. Provide `schema` (JSON) or `schema_id` (UUID of a saved schema).\n\n\nThe `schema` payload uses [Zod-based](https://zod.dev) structured schema format. You can build it in the [Schema Builder](https://structpdf.com/schema).\n\n**Client Timeout:** Latency depends on the complexity of extraction. The recommended client timeout is 120 seconds, most extractions complete much faster.","security":[{"bearerAuth":[]},{"apiKeyAuth":[]}],"requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/ExtractRequest"}}}},"responses":{"200":{"description":"Extraction result. `result` matches the schema shape; `metadata.findings` and `metadata.errors` use dot-notation.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ExtractResponse"},"example":{"generationId":"ae25e72f-d811-40db-9d33-5923dff25487","success":"Complete","result":{"guest_count":4,"tax":17.74,"total":239.78,"tip":40.04,"subtotal":182},"metadata":{"findings":[{"schema_key":"guest_count","value":4,"page":1,"document_snippet":"Guest Count: 4"},{"schema_key":"tax","value":17.74,"page":1,"document_snippet":"Tax .. $17.74"},{"schema_key":"total","value":239.78,"page":1,"document_snippet":"Total .. $239.78"},{"schema_key":"tip","value":40.04,"page":1,"document_snippet":"Tip .. $40.04"},{"schema_key":"subtotal","value":182,"page":1,"document_snippet":"Subtotal .. $182.00"}],"errors":[]}}}}},"400":{"description":"Invalid request: missing or invalid schema input, unknown schema_id, schema too deep (response body lists offending paths), missing or empty file, file exceeds 50MB, or unsupported file type."},"401":{"description":"Missing or invalid API key"},"422":{"description":"Extraction could not be completed for this document."},"429":{"description":"Rate limit exceeded. Too many requests from this IP. Retry after the delay indicated by the Retry-After header."}}}}}}