1- import sys
21from typing import List , Optional
32
43from open_data_contract_standard .model import OpenDataContractStandard , SchemaProperty
54
65from datacontract .export .exporter import Exporter
76
7+ OBJECT_TYPES : set = {"object" , "record" , "struct" }
8+
89
910class ProtoBufExporter (Exporter ):
1011 def export (self , data_contract , schema_name , server , sql_server_type , export_args ) -> dict :
11- # Returns a dict containing the protobuf representation.
12+ """Exports data contract to Protobuf format."""
1213 proto = to_protobuf (data_contract )
1314 return proto
1415
1516
1617def _get_config_value (prop : SchemaProperty , key : str ) -> Optional [str ]:
17- """Get a custom property value."""
18+ """Get a custom property value from customProperties ."""
1819 if prop .customProperties is None :
1920 return None
2021 for cp in prop .customProperties :
@@ -57,10 +58,10 @@ def to_protobuf(data_contract: OpenDataContractStandard) -> str:
5758
5859 # Build header with syntax and package declarations.
5960 header = 'syntax = "proto3";\n \n '
60- package = "example" # Default package
61+ package = "example" # Default package, can be customized
6162 header += f"package { package } ;\n \n "
6263
63- # Append enum definitions.
64+ # Append enum definitions before messages .
6465 for enum_name , enum_values in enum_definitions .items ():
6566 header += f"// Enum for { enum_name } \n "
6667 header += f"enum { enum_name } {{\n "
@@ -72,6 +73,7 @@ def to_protobuf(data_contract: OpenDataContractStandard) -> str:
7273 else :
7374 header += f" // Warning: Enum values for { enum_name } are not a dictionary\n "
7475 header += "}\n \n "
76+
7577 return header + messages
7678
7779
@@ -86,11 +88,12 @@ def _is_enum_field(prop: SchemaProperty) -> bool:
8688def _get_enum_name (prop : SchemaProperty ) -> str :
8789 """
8890 Returns the enum name either from the field's "enum_name" or derived from the field name.
91+ Uses UpperCamelCase formatting.
8992 """
9093 enum_name = _get_config_value (prop , "enum_name" )
9194 if enum_name :
92- return enum_name
93- return _to_protobuf_message_name (prop .name )
95+ return _snake_to_upper_camel ( enum_name )
96+ return _snake_to_upper_camel (prop .name )
9497
9598
9699def _get_enum_values (prop : SchemaProperty ) -> dict :
@@ -103,69 +106,126 @@ def _get_enum_values(prop: SchemaProperty) -> dict:
103106 return {}
104107
105108
106- def _to_protobuf_message_name (name : str ) -> str :
109+ def _snake_to_upper_camel (name : str ) -> str :
107110 """
108- Returns a valid Protobuf message/enum name by capitalizing the first letter.
111+ Convert snake_case to UpperCamelCase.
112+ Preserves existing capitalization in parts.
113+
114+ Examples:
115+ "fsa_room" -> "FsaRoom"
116+ "FsaRegister" -> "FsaRegister" (already in UpperCamelCase)
117+ "simple_obj" -> "SimpleObj"
109118 """
110- return name [0 ].upper () + name [1 :] if name else name
119+ if not name :
120+ return name
111121
122+ # If already UpperCamelCase (first letter uppercase, no underscores after first word)
123+ if name and name [0 ].isupper () and "_" not in name :
124+ return name
112125
113- def to_protobuf_message (
114- model_name : str , properties : List [SchemaProperty ], description : str , indent_level : int = 0
115- ) -> str :
126+ parts = name .split ("_" )
127+ # Capitalize each part while preserving internal capitalization
128+ return "" .join (part [0 ].upper () + part [1 :] if part else "" for part in parts )
129+
130+
131+ def _get_type_name (prop : SchemaProperty ) -> str :
116132 """
117- Generates a Protobuf message definition from the model's fields .
118- Handles nested messages for complex types .
133+ Get appropriate message/enum type name in UpperCamelCase .
134+ Used for message declarations and field type references .
119135 """
120- result = ""
121- if description :
122- result += f" { indent ( indent_level ) } // { description } \n "
136+ # For enums
137+ if _is_enum_field ( prop ) :
138+ return _get_enum_name ( prop )
123139
124- result += f"message { _to_protobuf_message_name (model_name )} {{\n "
125- number = 1
126- for prop in properties :
127- # For nested objects, generate a nested message.
128- field_type = prop .logicalType or ""
129- if field_type .lower () in ["object" , "record" , "struct" ]:
130- nested_desc = prop .description or ""
131- nested_props = prop .properties or []
132- nested_message = to_protobuf_message (prop .name , nested_props , nested_desc , indent_level + 1 )
133- result += nested_message + "\n "
140+ # For regular objects
141+ if prop .logicalType and prop .logicalType .lower () in OBJECT_TYPES :
142+ return _snake_to_upper_camel (prop .name )
134143
135- field_desc = prop .description or ""
136- result += to_protobuf_field (prop , field_desc , number , indent_level + 1 ) + "\n "
137- number += 1
144+ # For objects inside arrays
145+ if (
146+ prop .logicalType
147+ and prop .logicalType .lower () == "array"
148+ and prop .items
149+ and prop .items .logicalType
150+ and prop .items .logicalType .lower () in OBJECT_TYPES
151+ ):
152+ # If explicit name is provided in items.name
153+ if hasattr (prop .items , "name" ) and prop .items .name :
154+ # Normalize items.name the same way as message declarations
155+ return _snake_to_upper_camel (prop .items .name )
138156
139- result += f"{ indent (indent_level )} }}\n "
140- return result
157+ # Otherwise generate from field name
158+ return _snake_to_upper_camel (prop .name )
159+
160+ return _snake_to_upper_camel (prop .name )
141161
142162
143- def to_protobuf_field (prop : SchemaProperty , description : str , number : int , indent_level : int = 0 ) -> str :
163+ def _should_create_nested_message (prop : SchemaProperty ) -> bool :
144164 """
145- Generates a field definition within a Protobuf message.
165+ Check if we need to create a nested message for this property.
166+ Returns True for objects and arrays of objects.
146167 """
147- result = ""
148- if description :
149- result += f"{ indent (indent_level )} // { description } \n "
150- result += f"{ indent (indent_level )} { _convert_type (prop )} { prop .name } = { number } ;"
151- return result
168+ if not prop .logicalType :
169+ return False
152170
171+ lower_type = prop .logicalType .lower ()
153172
154- def indent (indent_level : int ) -> str :
155- return " " * indent_level
173+ # Regular object
174+ if lower_type in OBJECT_TYPES :
175+ return True
176+
177+ # Array of objects
178+ if lower_type == "array" and prop .items :
179+ items_lower_type = prop .items .logicalType .lower () if prop .items .logicalType else ""
180+ return items_lower_type in OBJECT_TYPES
181+
182+ return False
156183
157184
158- def _convert_type (prop : SchemaProperty ) -> str :
185+ def _get_nested_properties (prop : SchemaProperty ) -> Optional [ List [ SchemaProperty ]] :
159186 """
160- Converts a field's type (from the data contract) to a Protobuf type .
161- Prioritizes enum conversion if a non-empty "values" property exists .
187+ Get properties for nested message .
188+ Returns None if no nested properties .
162189 """
163- # For debugging purposes
164- print ("Converting field:" , prop .name , file = sys .stderr )
165- # If the field should be treated as an enum, return its enum name.
166- if _is_enum_field (prop ):
167- return _get_enum_name (prop )
190+ if prop .logicalType and prop .logicalType .lower () in OBJECT_TYPES :
191+ return prop .properties or []
192+
193+ if (
194+ prop .logicalType
195+ and prop .logicalType .lower () == "array"
196+ and prop .items
197+ and prop .items .logicalType
198+ and prop .items .logicalType .lower () in OBJECT_TYPES
199+ ):
200+ return prop .items .properties or []
201+
202+ return None
203+
168204
205+ def _get_nested_description (prop : SchemaProperty ) -> str :
206+ """
207+ Get description for nested message.
208+ """
209+ if prop .logicalType and prop .logicalType .lower () in OBJECT_TYPES :
210+ return prop .description or ""
211+
212+ if (
213+ prop .logicalType
214+ and prop .logicalType .lower () == "array"
215+ and prop .items
216+ and prop .items .logicalType
217+ and prop .items .logicalType .lower () in OBJECT_TYPES
218+ ):
219+ return prop .items .description or ""
220+
221+ return ""
222+
223+
224+ def _get_primitive_type (prop : SchemaProperty ) -> str :
225+ """
226+ Get Protobuf type for primitive fields.
227+ Handles recursive type resolution for arrays of primitives.
228+ """
169229 field_type = prop .logicalType or ""
170230 lower_type = field_type .lower ()
171231
@@ -185,19 +245,112 @@ def _convert_type(prop: SchemaProperty) -> str:
185245 return "bool"
186246 if lower_type in ["bytes" ]:
187247 return "bytes"
188- if lower_type in ["object" , "record" , "struct" ]:
189- return _to_protobuf_message_name (prop .name )
248+
249+ # Recursive handling for arrays of primitives
250+ if lower_type == "array" and prop .items :
251+ return _get_primitive_type (prop .items )
252+
253+ return "string" # Fallback for unrecognized types
254+
255+
256+ def _get_field_type (prop : SchemaProperty ) -> str :
257+ """
258+ Get Protobuf type for field (string, int32, repeated TypeName, etc).
259+ Combines repeated keyword with type name for arrays.
260+ """
261+ field_type = prop .logicalType or ""
262+ lower_type = field_type .lower ()
263+
264+ # Handle arrays
190265 if lower_type == "array" :
191- # Handle array types. Check for an "items" property.
192266 if prop .items :
193267 items_type = prop .items .logicalType or ""
194- if items_type .lower () in ["object" , "record" , "struct" ]:
195- # Singularize the field name (a simple approach).
196- singular = prop .name [:- 1 ] if prop .name .endswith ("s" ) else prop .name
197- return "repeated " + _to_protobuf_message_name (singular )
268+ items_lower_type = items_type .lower ()
269+
270+ # If array contains objects
271+ if items_lower_type in OBJECT_TYPES :
272+ type_name = _get_type_name (prop ) # e.g., FsaRoom
273+ return f"repeated { type_name } "
198274 else :
199- return "repeated " + _convert_type (prop .items )
275+ # For primitive types
276+ primitive_type = _get_primitive_type (prop .items )
277+ return f"repeated { primitive_type } "
200278 else :
201- return "repeated string"
202- # Fallback for unrecognized types.
203- return "string"
279+ return "repeated string" # Default array type
280+
281+ # Handle regular objects
282+ if lower_type in OBJECT_TYPES :
283+ type_name = _get_type_name (prop ) # e.g., SimpleObj
284+ return type_name
285+
286+ # Handle enums
287+ if _is_enum_field (prop ):
288+ return _get_enum_name (prop )
289+
290+ # Handle primitive types
291+ return _get_primitive_type (prop )
292+
293+
294+ def to_protobuf_message (
295+ model_name : str , properties : List [SchemaProperty ], description : str , indent_level : int = 0
296+ ) -> str :
297+ """
298+ Generates a Protobuf message definition from the model's fields.
299+ Handles nested messages for complex types recursively.
300+ """
301+ result = ""
302+ if description :
303+ result += f"{ indent (indent_level )} // { description } \n "
304+
305+ # Message name always in UpperCamelCase
306+ message_name = _snake_to_upper_camel (model_name )
307+ result += f"{ indent (indent_level )} message { message_name } {{\n "
308+
309+ # Phase 1: Create all nested messages
310+ for prop in properties :
311+ if _should_create_nested_message (prop ):
312+ type_name = _get_type_name (prop ) # UpperCamelCase
313+ nested_props = _get_nested_properties (prop )
314+ nested_desc = _get_nested_description (prop )
315+
316+ if nested_props is not None :
317+ nested_message = to_protobuf_message (type_name , nested_props , nested_desc , indent_level + 1 )
318+ result += nested_message + "\n "
319+
320+ # Phase 2: Create all fields
321+ number = 1
322+ for prop in properties :
323+ field_name = prop .name # snake_case (preserve as in YAML)
324+ field_decl = _get_field_declaration (prop )
325+ field_desc = prop .description or ""
326+
327+ result += f"{ indent (indent_level + 1 )} "
328+ if field_desc :
329+ result += f"// { field_desc } \n { indent (indent_level + 1 )} "
330+
331+ result += f"{ field_decl } { field_name } = { number } ;\n "
332+ number += 1
333+
334+ result += f"{ indent (indent_level )} }}\n "
335+ return result
336+
337+
338+ def indent (indent_level : int ) -> str :
339+ """Generate indentation string for Protobuf formatting."""
340+ return " " * indent_level
341+
342+
343+ def _get_field_declaration (prop : SchemaProperty ) -> str :
344+ """
345+ Returns field declaration with optional keyword if needed.
346+ """
347+ field_type = _get_field_type (prop ) # includes "repeated" if needed
348+
349+ logical_type = (prop .logicalType or "" ).lower ()
350+ is_array = logical_type == "array"
351+ is_message_type = logical_type in OBJECT_TYPES
352+
353+ # Add 'optional' only for non-required, non-array, non-message fields (scalars/enums)
354+ if hasattr (prop , "required" ) and prop .required is False and not is_array and not is_message_type :
355+ return f"optional { field_type } "
356+ return field_type
0 commit comments