The SortingWriter writes corrupted data for String and FixedLenByteArray types for any meaningfully large number of Rows. I think this has been present since the introduction of the SortingWriter. In the new tests in the sortingwriter-corruption branch (PR #24), sometimes the mismatched row is found at another index in the array: we found the row at index 106 in want
and sometimes it's just corrupted non-row data: got row index 43 isn't found in want rows, and is therefore corrupted data.
=== RUN TestSortingWriterCorruptedString
sorting_test.go:239: rows mismatch at index 42 :
sorting_test.go:240: want: parquet_test.Row{Tag:"NOjZhG1MpXf4naQFRqE25pCr4EszfVExuT9BGf4znMjAl82X081NXl51t7hYFh1ESB9HGrgtGns949cECbwr0WFcOQ7hQii7s418"}
sorting_test.go:241: got: parquet_test.Row{Tag:"zipEs765yzHuaW9s7YAXu23ORm8DgLpmlJeZLy7l4z5yBd4AqtXLfgnjiOarORSiywx8yuzgZwJBmwLu0XIjB1IOmkkMJdtgE91L"}
sorting_test.go:247: we found the row at index 106 in want.
sorting_test.go:239: rows mismatch at index 43 :
sorting_test.go:240: want: parquet_test.Row{Tag:"ONxgPm4crYY7e5So1q5PJBZmoP6edxQHM5Qcb9iPmPBb1EPejC2gbNTCw3VYO1v4tDxey9OF4Dya6VdeuAVkyNG8xSmIXeLnHWDs"}
sorting_test.go:241: got: parquet_test.Row{Tag:"lJeZLy7l4z5yBd4AqtXLfgnjiOarORSiywx8yuzgZwJBmwLu0XIjB1IOmkkMJdtgE91Ly9OF4Dya6VdeuAVkyNG8xSmIXeLnHWDs"}
sorting_test.go:253: got row index 43 isn't found in want rows, and is therefore corrupted data.
sorting_test.go:259: 2 rows mismatched out of 107 total
--- FAIL: TestSortingWriterCorruptedString (0.12s)
=== RUN TestSortingWriterCorruptedFixedLenByteArray
sorting_test.go:239: rows mismatch at index 168 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x43, 0x63, 0x76, 0x6c, 0x6a, 0x70, 0x4d, 0x51, 0x76, 0x79, 0x71, 0x6b, 0x73, 0x32, 0x6c, 0x4d}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x53, 0x75, 0x7a, 0x77, 0x6b, 0x58, 0x44, 0x44, 0x76, 0x77, 0x32, 0x4d, 0x67, 0x4a, 0x77, 0x4a}}
sorting_test.go:247: we found the row at index 360 in want.
sorting_test.go:239: rows mismatch at index 169 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x43, 0x6c, 0x6b, 0x70, 0x77, 0x32, 0x56, 0x38, 0x6c, 0x33, 0x4e, 0x6f, 0x55, 0x50, 0x70, 0x53}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x31, 0x46, 0x6a, 0x35, 0x66, 0x59, 0x35, 0x56, 0x79, 0x58, 0x48, 0x45, 0x33, 0x6c, 0x41}}
sorting_test.go:247: we found the row at index 361 in want.
sorting_test.go:239: rows mismatch at index 170 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x43, 0x70, 0x53, 0x78, 0x35, 0x50, 0x4b, 0x34, 0x4f, 0x64, 0x6d, 0x36, 0x6f, 0x74, 0x30, 0x77}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x32, 0x6c, 0x66, 0x61, 0x42, 0x74, 0x37, 0x56, 0x71, 0x47, 0x74, 0x55, 0x42, 0x37, 0x70}}
sorting_test.go:247: we found the row at index 362 in want.
sorting_test.go:239: rows mismatch at index 171 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x43, 0x7a, 0x64, 0x68, 0x70, 0x38, 0x47, 0x5a, 0x47, 0x58, 0x6d, 0x4e, 0x68, 0x43, 0x4c, 0x52}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x46, 0x4d, 0x55, 0x50, 0x5a, 0x4c, 0x6d, 0x62, 0x36, 0x6c, 0x57, 0x52, 0x7a, 0x6d, 0x4c}}
sorting_test.go:247: we found the row at index 363 in want.
sorting_test.go:239: rows mismatch at index 172 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x39, 0x4e, 0x6d, 0x79, 0x36, 0x47, 0x71, 0x6a, 0x76, 0x6b, 0x32, 0x4b, 0x6e, 0x30, 0x78}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x4a, 0x47, 0x49, 0x72, 0x4b, 0x6d, 0x4e, 0x48, 0x50, 0x57, 0x63, 0x4b, 0x31, 0x78, 0x77}}
sorting_test.go:247: we found the row at index 364 in want.
sorting_test.go:239: rows mismatch at index 173 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x43, 0x79, 0x52, 0x46, 0x4b, 0x6c, 0x34, 0x5a, 0x79, 0x61, 0x71, 0x50, 0x6d, 0x6e, 0x34}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x4b, 0x42, 0x47, 0x78, 0x6e, 0x64, 0x69, 0x49, 0x31, 0x38, 0x4d, 0x77, 0x67, 0x4c, 0x76}}
sorting_test.go:247: we found the row at index 365 in want.
sorting_test.go:239: rows mismatch at index 174 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x4b, 0x4d, 0x6f, 0x37, 0x4e, 0x79, 0x43, 0x43, 0x44, 0x65, 0x67, 0x62, 0x43, 0x30, 0x4c}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x4e, 0x33, 0x51, 0x48, 0x5a, 0x32, 0x73, 0x41, 0x47, 0x68, 0x79, 0x6e, 0x72, 0x63, 0x32}}
sorting_test.go:247: we found the row at index 366 in want.
sorting_test.go:239: rows mismatch at index 175 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x53, 0x6c, 0x51, 0x65, 0x65, 0x79, 0x39, 0x52, 0x59, 0x59, 0x75, 0x4c, 0x32, 0x63, 0x50}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x51, 0x7a, 0x71, 0x7a, 0x64, 0x4f, 0x4a, 0x5a, 0x37, 0x38, 0x51, 0x43, 0x45, 0x43, 0x36}}
sorting_test.go:247: we found the row at index 367 in want.
sorting_test.go:239: rows mismatch at index 176 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x5a, 0x4d, 0x38, 0x51, 0x75, 0x55, 0x58, 0x6a, 0x4d, 0x79, 0x58, 0x71, 0x67, 0x77, 0x4a}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x56, 0x39, 0x44, 0x63, 0x35, 0x71, 0x79, 0x6b, 0x4e, 0x70, 0x36, 0x54, 0x4a, 0x64, 0x48}}
sorting_test.go:247: we found the row at index 368 in want.
sorting_test.go:239: rows mismatch at index 177 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x66, 0x31, 0x53, 0x4d, 0x50, 0x39, 0x51, 0x50, 0x57, 0x35, 0x64, 0x4b, 0x4f, 0x47, 0x69}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x61, 0x67, 0x7a, 0x41, 0x49, 0x33, 0x41, 0x56, 0x73, 0x45, 0x6c, 0x41, 0x73, 0x36, 0x69}}
sorting_test.go:247: we found the row at index 369 in want.
sorting_test.go:239: rows mismatch at index 178 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x66, 0x41, 0x75, 0x69, 0x48, 0x76, 0x77, 0x56, 0x69, 0x52, 0x43, 0x56, 0x56, 0x77, 0x45}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x62, 0x64, 0x35, 0x5a, 0x5a, 0x65, 0x72, 0x73, 0x64, 0x44, 0x30, 0x58, 0x6d, 0x31, 0x35}}
sorting_test.go:247: we found the row at index 370 in want.
sorting_test.go:239: rows mismatch at index 179 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x6b, 0x30, 0x47, 0x47, 0x49, 0x5a, 0x57, 0x7a, 0x73, 0x7a, 0x5a, 0x34, 0x57, 0x72, 0x47}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x6d, 0x6c, 0x69, 0x55, 0x70, 0x49, 0x30, 0x74, 0x6a, 0x50, 0x64, 0x47, 0x6a, 0x4a, 0x35}}
sorting_test.go:247: we found the row at index 371 in want.
sorting_test.go:239: rows mismatch at index 180 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x6d, 0x4b, 0x36, 0x43, 0x77, 0x41, 0x6e, 0x6f, 0x6f, 0x4e, 0x32, 0x51, 0x4a, 0x38, 0x71}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x54, 0x6e, 0x38, 0x35, 0x6c, 0x44, 0x6b, 0x68, 0x62, 0x67, 0x6a, 0x70, 0x69, 0x6a, 0x37, 0x61}}
sorting_test.go:247: we found the row at index 372 in want.
sorting_test.go:239: rows mismatch at index 181 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x74, 0x39, 0x59, 0x74, 0x6b, 0x41, 0x70, 0x4e, 0x6f, 0x36, 0x33, 0x57, 0x72, 0x69, 0x4f}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x33, 0x58, 0x45, 0x6e, 0x43, 0x57, 0x77, 0x51, 0x30, 0x4f, 0x52, 0x48, 0x42, 0x70, 0x51}}
sorting_test.go:247: we found the row at index 373 in want.
sorting_test.go:239: rows mismatch at index 182 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x76, 0x47, 0x4f, 0x54, 0x66, 0x62, 0x58, 0x4f, 0x79, 0x53, 0x65, 0x63, 0x47, 0x56, 0x62}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x34, 0x57, 0x71, 0x77, 0x41, 0x67, 0x33, 0x38, 0x4e, 0x50, 0x70, 0x61, 0x75, 0x6e, 0x52}}
sorting_test.go:247: we found the row at index 374 in want.
sorting_test.go:239: rows mismatch at index 183 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x44, 0x78, 0x6f, 0x39, 0x71, 0x74, 0x36, 0x57, 0x37, 0x48, 0x6e, 0x49, 0x64, 0x62, 0x71, 0x74}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x38, 0x71, 0x6d, 0x76, 0x77, 0x33, 0x36, 0x69, 0x6d, 0x38, 0x70, 0x45, 0x44, 0x4c, 0x48}}
sorting_test.go:247: we found the row at index 375 in want.
sorting_test.go:239: rows mismatch at index 184 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x38, 0x55, 0x4a, 0x71, 0x69, 0x70, 0x63, 0x58, 0x54, 0x46, 0x52, 0x4b, 0x73, 0x39, 0x65}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x4a, 0x44, 0x6e, 0x58, 0x38, 0x4a, 0x58, 0x59, 0x38, 0x61, 0x43, 0x6a, 0x50, 0x63, 0x4b}}
sorting_test.go:247: we found the row at index 376 in want.
sorting_test.go:239: rows mismatch at index 185 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x44, 0x4a, 0x30, 0x36, 0x4e, 0x70, 0x46, 0x4c, 0x67, 0x68, 0x66, 0x4e, 0x62, 0x66, 0x4b}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x52, 0x69, 0x66, 0x43, 0x68, 0x52, 0x72, 0x39, 0x53, 0x42, 0x67, 0x7a, 0x4f, 0x63, 0x56}}
sorting_test.go:247: we found the row at index 377 in want.
sorting_test.go:239: rows mismatch at index 186 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x4c, 0x78, 0x4f, 0x54, 0x57, 0x4b, 0x59, 0x73, 0x44, 0x57, 0x63, 0x68, 0x34, 0x34, 0x32}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x57, 0x36, 0x76, 0x37, 0x76, 0x4b, 0x6e, 0x69, 0x68, 0x54, 0x46, 0x48, 0x59, 0x41, 0x73}}
sorting_test.go:247: we found the row at index 378 in want.
sorting_test.go:239: rows mismatch at index 187 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x4d, 0x6b, 0x47, 0x4b, 0x6a, 0x69, 0x30, 0x58, 0x68, 0x38, 0x6e, 0x6c, 0x6e, 0x79, 0x42}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x58, 0x65, 0x4f, 0x56, 0x44, 0x6f, 0x72, 0x32, 0x78, 0x70, 0x65, 0x57, 0x56, 0x49, 0x51}}
sorting_test.go:247: we found the row at index 379 in want.
sorting_test.go:239: rows mismatch at index 188 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x4e, 0x54, 0x71, 0x63, 0x37, 0x4e, 0x78, 0x47, 0x76, 0x63, 0x54, 0x5a, 0x59, 0x49, 0x64}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x64, 0x5a, 0x7a, 0x55, 0x6b, 0x55, 0x33, 0x41, 0x4b, 0x43, 0x33, 0x4c, 0x34, 0x6c, 0x4b}}
sorting_test.go:247: we found the row at index 380 in want.
sorting_test.go:239: rows mismatch at index 189 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x70, 0x6f, 0x69, 0x4f, 0x7a, 0x34, 0x77, 0x47, 0x4e, 0x68, 0x63, 0x78, 0x56, 0x72, 0x70}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x65, 0x31, 0x4b, 0x69, 0x36, 0x55, 0x4d, 0x6e, 0x51, 0x75, 0x53, 0x53, 0x79, 0x44, 0x37}}
sorting_test.go:247: we found the row at index 381 in want.
sorting_test.go:239: rows mismatch at index 190 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x71, 0x6c, 0x77, 0x67, 0x56, 0x32, 0x73, 0x42, 0x34, 0x62, 0x43, 0x44, 0x4c, 0x30, 0x51}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x55, 0x72, 0x58, 0x32, 0x56, 0x73, 0x63, 0x71, 0x49, 0x37, 0x69, 0x48, 0x53, 0x33, 0x72, 0x64}}
sorting_test.go:247: we found the row at index 382 in want.
sorting_test.go:239: rows mismatch at index 191 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x45, 0x74, 0x54, 0x4e, 0x63, 0x4e, 0x78, 0x54, 0x4b, 0x7a, 0x38, 0x6c, 0x77, 0x65, 0x57, 0x6e}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x56, 0x32, 0x62, 0x54, 0x39, 0x72, 0x76, 0x73, 0x62, 0x45, 0x63, 0x72, 0x57, 0x48, 0x5a, 0x36}}
sorting_test.go:247: we found the row at index 383 in want.
sorting_test.go:239: rows mismatch at index 378 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x55, 0x57, 0x36, 0x76, 0x37, 0x76, 0x4b, 0x6e, 0x69, 0x68, 0x54, 0x46, 0x48, 0x59, 0x41, 0x73}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x6d, 0x51, 0x74, 0x55, 0x37, 0x54, 0x48, 0x47, 0x69, 0x74, 0x67, 0x57, 0x4d, 0x7a, 0x31, 0x37}}
sorting_test.go:247: we found the row at index 570 in want.
sorting_test.go:239: rows mismatch at index 379 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x55, 0x58, 0x65, 0x4f, 0x56, 0x44, 0x6f, 0x72, 0x32, 0x78, 0x70, 0x65, 0x57, 0x56, 0x49, 0x51}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x6d, 0x53, 0x32, 0x59, 0x61, 0x5a, 0x67, 0x6c, 0x53, 0x72, 0x50, 0x57, 0x37, 0x45, 0x67, 0x79}}
sorting_test.go:247: we found the row at index 571 in want.
sorting_test.go:239: rows mismatch at index 380 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x55, 0x64, 0x5a, 0x7a, 0x55, 0x6b, 0x55, 0x33, 0x41, 0x4b, 0x43, 0x33, 0x4c, 0x34, 0x6c, 0x4b}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x6d, 0x5a, 0x73, 0x4a, 0x75, 0x41, 0x38, 0x70, 0x49, 0x6b, 0x4f, 0x47, 0x7a, 0x67, 0x77, 0x65}}
sorting_test.go:247: we found the row at index 572 in want.
sorting_test.go:239: rows mismatch at index 381 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x55, 0x65, 0x31, 0x4b, 0x69, 0x36, 0x55, 0x4d, 0x6e, 0x51, 0x75, 0x53, 0x53, 0x79, 0x44, 0x37}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x6d, 0x64, 0x39, 0x61, 0x45, 0x72, 0x44, 0x42, 0x49, 0x4f, 0x6a, 0x55, 0x70, 0x77, 0x6b, 0x43}}
sorting_test.go:247: we found the row at index 573 in want.
sorting_test.go:239: rows mismatch at index 382 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x55, 0x72, 0x58, 0x32, 0x56, 0x73, 0x63, 0x71, 0x49, 0x37, 0x69, 0x48, 0x53, 0x33, 0x72, 0x64}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x6d, 0x6f, 0x43, 0x74, 0x49, 0x32, 0x6f, 0x4c, 0x78, 0x48, 0x7a, 0x71, 0x61, 0x41, 0x41, 0x50}}
sorting_test.go:247: we found the row at index 574 in want.
sorting_test.go:239: rows mismatch at index 383 :
sorting_test.go:240: want: parquet_test.Row{ID:[16]uint8{0x56, 0x32, 0x62, 0x54, 0x39, 0x72, 0x76, 0x73, 0x62, 0x45, 0x63, 0x72, 0x57, 0x48, 0x5a, 0x36}}
sorting_test.go:241: got: parquet_test.Row{ID:[16]uint8{0x6d, 0x70, 0x36, 0x48, 0x47, 0x56, 0x51, 0x6d, 0x4c, 0x43, 0x55, 0x41, 0x49, 0x35, 0x33, 0x77}}
sorting_test.go:247: we found the row at index 575 in want.
sorting_test.go:259: 30 rows mismatched out of 700 total
--- FAIL: TestSortingWriterCorruptedFixedLenByteArray (0.00s)