Skip to content

Commit 8ef4197

Browse files
committed
SAV reader: Fix issue with 757-byte variables
1 parent 039118f commit 8ef4197

File tree

3 files changed

+108
-14
lines changed

3 files changed

+108
-14
lines changed

src/spss/readstat_sav_parse.c

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -731,14 +731,29 @@ readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ct
731731
{
732732
varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup);
733733
if (found) {
734-
ctx->varinfo[found->index]->string_length = temp_val;
735-
ctx->varinfo[found->index]->write_format.width = temp_val;
736-
ctx->varinfo[found->index]->print_format.width = temp_val;
734+
// See logic above; we need to apply this to all matching variables since ghost variable
735+
// names may conflict with real variable names.
736+
varlookup_t *first_match = found, *last_match = found;
737+
varlookup_t *iter_match = found - 1;
738+
while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) {
739+
first_match = iter_match;
740+
iter_match--;
741+
}
742+
iter_match = found + 1;
743+
while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) {
744+
last_match = iter_match;
745+
iter_match++;
746+
}
747+
for (iter_match=first_match; iter_match<=last_match; iter_match++) {
748+
ctx->varinfo[iter_match->index]->string_length = temp_val;
749+
ctx->varinfo[iter_match->index]->write_format.width = temp_val;
750+
ctx->varinfo[iter_match->index]->print_format.width = temp_val;
751+
}
737752
}
738753
}
739754
break;
740755
case 4:
741-
#line 202 "src/spss/readstat_sav_parse.rl"
756+
#line 217 "src/spss/readstat_sav_parse.rl"
742757
{
743758
if ((*p) != '\0') {
744759
unsigned char digit = (*p) - '0';
@@ -751,10 +766,10 @@ readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ct
751766
}
752767
break;
753768
case 5:
754-
#line 213 "src/spss/readstat_sav_parse.rl"
769+
#line 228 "src/spss/readstat_sav_parse.rl"
755770
{ temp_val = 0; }
756771
break;
757-
#line 758 "src/spss/readstat_sav_parse.c"
772+
#line 773 "src/spss/readstat_sav_parse.c"
758773
}
759774
}
760775

@@ -775,21 +790,36 @@ readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ct
775790
{
776791
varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup);
777792
if (found) {
778-
ctx->varinfo[found->index]->string_length = temp_val;
779-
ctx->varinfo[found->index]->write_format.width = temp_val;
780-
ctx->varinfo[found->index]->print_format.width = temp_val;
793+
// See logic above; we need to apply this to all matching variables since ghost variable
794+
// names may conflict with real variable names.
795+
varlookup_t *first_match = found, *last_match = found;
796+
varlookup_t *iter_match = found - 1;
797+
while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) {
798+
first_match = iter_match;
799+
iter_match--;
800+
}
801+
iter_match = found + 1;
802+
while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) {
803+
last_match = iter_match;
804+
iter_match++;
805+
}
806+
for (iter_match=first_match; iter_match<=last_match; iter_match++) {
807+
ctx->varinfo[iter_match->index]->string_length = temp_val;
808+
ctx->varinfo[iter_match->index]->write_format.width = temp_val;
809+
ctx->varinfo[iter_match->index]->print_format.width = temp_val;
810+
}
781811
}
782812
}
783813
break;
784-
#line 785 "src/spss/readstat_sav_parse.c"
814+
#line 815 "src/spss/readstat_sav_parse.c"
785815
}
786816
}
787817
}
788818

789819
_out: {}
790820
}
791821

792-
#line 221 "src/spss/readstat_sav_parse.rl"
822+
#line 236 "src/spss/readstat_sav_parse.rl"
793823

794824

795825
if (cs < 11 || p != pe) {

src/spss/readstat_sav_parse.rl

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,24 @@ readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ct
193193
action set_width {
194194
varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup);
195195
if (found) {
196-
ctx->varinfo[found->index]->string_length = temp_val;
197-
ctx->varinfo[found->index]->write_format.width = temp_val;
198-
ctx->varinfo[found->index]->print_format.width = temp_val;
196+
// See logic above; we need to apply this to all matching variables since ghost variable
197+
// names may conflict with real variable names.
198+
varlookup_t *first_match = found, *last_match = found;
199+
varlookup_t *iter_match = found - 1;
200+
while (iter_match >= table && strcmp(iter_match->name, temp_key) == 0) {
201+
first_match = iter_match;
202+
iter_match--;
203+
}
204+
iter_match = found + 1;
205+
while (iter_match - table < var_count && strcmp(iter_match->name, temp_key) == 0) {
206+
last_match = iter_match;
207+
iter_match++;
208+
}
209+
for (iter_match=first_match; iter_match<=last_match; iter_match++) {
210+
ctx->varinfo[iter_match->index]->string_length = temp_val;
211+
ctx->varinfo[iter_match->index]->write_format.width = temp_val;
212+
ctx->varinfo[iter_match->index]->print_format.width = temp_val;
213+
}
199214
}
200215
}
201216

src/test/test_list.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,55 @@ static rt_test_group_t _test_groups[] = {
421421
}
422422
}
423423
},
424+
{
425+
/* See: https://github.com/WizardMac/ReadStat/issues/260 */
426+
.label = "757-byte strings + double values in SAV",
427+
.test_formats = RT_FORMAT_SAV,
428+
.rows = 1,
429+
.columns = {
430+
{
431+
.name = "aaaaa2",
432+
.label = "x",
433+
.type = READSTAT_TYPE_STRING,
434+
.display_width = 757,
435+
.values = {
436+
{ .type = READSTAT_TYPE_STRING, .v =
437+
{ .string_value = /* 757 bytes long */
438+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
439+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
440+
441+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
442+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
443+
444+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
445+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
446+
447+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
448+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
449+
450+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
451+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
452+
453+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
454+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
455+
456+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
457+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
458+
459+
"0123456789" "0123456789" "0123456789" "0123456789" "0123456789"
460+
"0123456"
461+
}
462+
}
463+
}
464+
},
465+
{
466+
.name = "y",
467+
.label = "y",
468+
.type = READSTAT_TYPE_DOUBLE,
469+
.values = { { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 1.0 } } },
470+
}
471+
}
472+
},
424473
{
425474
.label = "Four 1024-byte strings in SAS7BDAT", /* Test 4096+ byte rows */
426475
.test_formats = RT_FORMAT_SAS7BDAT,

0 commit comments

Comments
 (0)