2020log = logging .getLogger (__name__ )
2121
2222
23+ _MISSING = object ()
24+
25+
2326class SchemaValidator :
2427 def __init__ (
2528 self ,
@@ -33,7 +36,8 @@ def __contains__(self, schema_format: str) -> bool:
3336 return schema_format in self .validator .format_checker .checkers
3437
3538 def validate (self , value : Any ) -> None :
36- errors_iter = self .validator .iter_errors (value )
39+ validation_value = self .get_binary_validation_value (value )
40+ errors_iter = self .validator .iter_errors (validation_value )
3741 errors = tuple (errors_iter )
3842 if errors :
3943 schema_type = (self .schema / "type" ).read_str_or_list ("any" )
@@ -93,6 +97,8 @@ def get_primitive_type(self, value: Any) -> Optional[str]:
9397 schema_types = sorted (self .validator .TYPE_CHECKER ._type_checkers )
9498 assert isinstance (schema_types , list )
9599 for schema_type in schema_types :
100+ if self .accepts_binary_string_value (schema_type , value ):
101+ return schema_type
96102 result = self .type_validator (value , type_override = schema_type )
97103 if not result :
98104 continue
@@ -104,6 +110,158 @@ def get_primitive_type(self, value: Any) -> Optional[str]:
104110 # OpenAPI 3.0: None is not a primitive type so None value will not find any type
105111 return None
106112
113+ def accepts_binary_string_value (
114+ self , schema_type : Optional [str ], value : Any
115+ ) -> bool :
116+ if schema_type != "string" or not isinstance (value , bytes ):
117+ return False
118+
119+ schema_format = (self .schema / "format" ).read_str (None )
120+ return schema_format in ("binary" , "byte" )
121+
122+ def get_binary_validation_value (self , value : Any ) -> Any :
123+ # OpenAPI binary and byte string values are represented as bytes,
124+ # but jsonschema validates string schemas against text values.
125+ if self .accepts_binary_string_value (
126+ (self .schema / "type" ).read_str (None ), value
127+ ):
128+ return self .decode_binary_string_value (value )
129+
130+ normalized = value
131+
132+ for keyword in ["oneOf" , "anyOf" , "allOf" ]:
133+ if keyword not in self .schema :
134+ continue
135+ for subschema in self .schema / keyword :
136+ branch_value = self .evolve (
137+ subschema
138+ ).get_binary_validation_value (value )
139+ normalized = self .merge_binary_validation_value (
140+ normalized , branch_value
141+ )
142+
143+ if isinstance (normalized , dict ):
144+ return self .get_binary_validation_mapping_value (normalized )
145+
146+ if isinstance (normalized , list ) and "items" in self .schema :
147+ return self .get_binary_validation_array_value (normalized )
148+
149+ return normalized
150+
151+ def decode_binary_string_value (self , value : bytes ) -> str :
152+ try :
153+ return value .decode ("utf-8" )
154+ except UnicodeDecodeError :
155+ return value .decode ("ASCII" , errors = "surrogateescape" )
156+
157+ def get_binary_validation_mapping_value (self , value : Any ) -> Any :
158+ normalized = value
159+
160+ if "properties" in self .schema :
161+ for prop_name , prop_schema in (self .schema / "properties" ).items ():
162+ if prop_name not in value :
163+ continue
164+ prop_value = self .evolve (
165+ prop_schema
166+ ).get_binary_validation_value (value [prop_name ])
167+ if prop_value is value [prop_name ]:
168+ continue
169+ if normalized is value :
170+ normalized = dict (value )
171+ normalized [prop_name ] = prop_value
172+
173+ additional_properties = self .schema .get ("additionalProperties" , True )
174+ if additional_properties in (True , False ):
175+ return normalized
176+
177+ property_names = set ()
178+ if "properties" in self .schema :
179+ property_names = set ((self .schema / "properties" ).keys ())
180+ additional_validator = self .evolve (
181+ self .schema / "additionalProperties"
182+ )
183+ for prop_name , prop_value in value .items ():
184+ if prop_name in property_names :
185+ continue
186+ normalized_prop_value = (
187+ additional_validator .get_binary_validation_value (prop_value )
188+ )
189+ if normalized_prop_value is prop_value :
190+ continue
191+ if normalized is value :
192+ normalized = dict (value )
193+ normalized [prop_name ] = normalized_prop_value
194+
195+ return normalized
196+
197+ def get_binary_validation_array_value (self , value : Any ) -> Any :
198+ item_validator = self .evolve (self .schema / "items" )
199+ normalized = None
200+
201+ for idx , item in enumerate (value ):
202+ normalized_item = item_validator .get_binary_validation_value (item )
203+ if normalized_item is item :
204+ continue
205+ if normalized is None :
206+ normalized = list (value )
207+ normalized [idx ] = normalized_item
208+
209+ if normalized is None :
210+ return value
211+
212+ return normalized
213+
214+ def merge_binary_validation_value (
215+ self , value : Any , normalized_value : Any
216+ ) -> Any :
217+ if normalized_value is value :
218+ return value
219+
220+ if isinstance (value , dict ) and isinstance (normalized_value , dict ):
221+ merged_dict = value
222+ for key , normalized_item in normalized_value .items ():
223+ item = value .get (key , _MISSING )
224+ if item is _MISSING :
225+ if merged_dict is value :
226+ merged_dict = dict (value )
227+ merged_dict [key ] = normalized_item
228+ continue
229+
230+ merged_item = self .merge_binary_validation_value (
231+ item , normalized_item
232+ )
233+ if merged_item is item :
234+ continue
235+ if merged_dict is value :
236+ merged_dict = dict (value )
237+ merged_dict [key ] = merged_item
238+
239+ return merged_dict
240+
241+ if isinstance (value , list ) and isinstance (normalized_value , list ):
242+ if len (value ) != len (normalized_value ):
243+ return normalized_value
244+
245+ merged_list = None
246+ for idx , (item , normalized_item ) in enumerate (
247+ zip (value , normalized_value )
248+ ):
249+ merged_item = self .merge_binary_validation_value (
250+ item , normalized_item
251+ )
252+ if merged_item is item :
253+ continue
254+ if merged_list is None :
255+ merged_list = list (value )
256+ merged_list [idx ] = merged_item
257+
258+ if merged_list is None :
259+ return value
260+
261+ return merged_list
262+
263+ return normalized_value
264+
107265 def iter_valid_schemas (self , value : Any ) -> Iterator [SchemaPath ]:
108266 yield self .schema
109267
0 commit comments