Skip to content

Commit b5d6508

Browse files
committed
ext/uri: speed up Uri\Rfc3986\Uri component reads
Five related changes to uri_parser_rfc3986.c that together cut parse + 7 reads on a 17-URL mix from 0.842s to 0.653s (1.7M parses, pinned to a single CPU). That's a 22% wall-time reduction and a 29% throughput increase. Parse-only moves from 0.394s to 0.378s, about 4%. 1. get_normalized_uri() now aliases the raw URI when nothing requires normalization. uriNormalizeSyntaxMaskRequiredExA reports which components need rewriting; a zero mask means the parsed URI is already canonical and the code skips the uriCopyUriMmA deep copy plus the full uriNormalizeSyntaxExMmA pass. This is the biggest single contributor. The dirty mask is cached on the struct so multiple non-raw reads on the same instance run the scan once. 2. The port now lives in a cache on the uris struct. The parse path stashes the converted zend_long directly, so the first port_read serves it without re-scanning. Subsequent reads short-circuit. The write path invalidates the cache. 3. port_str_to_zend_long_checked replaces its stack-copy + ZEND_STRTOUL with an inline digit accumulator. Uriparser has already validated that the port text is ASCII digits only, so the branch-heavy strtoul path is unnecessary. 4. uriparser_create_uris uses emalloc + targeted field init instead of ecalloc. The struct is ~440 bytes. We overwrite the uri member right after this function returns and we only touch normalized_uri once the init flag becomes true, so only the flag fields and dirty_mask need zeroing. 5. php_uri_parser_rfc3986_destroy skips uriFreeUriMembersMmA on normalized_uri when it was never built or when it aliases the raw uri. Paired with the emalloc change: the struct used to be fully zeroed by ecalloc, so the free was safe without a guard. Now that most of the struct is uninitialized, the guard is required. No behavior change. All 309 tests in ext/uri/tests pass. I also checked that URIs which need normalization (http://EXAMPLE.com/A/%2e%2e/c resolving to /c) still hit the full normalize path, so the alias shortcut is gated by a non-zero dirty mask.
1 parent 8ad79e1 commit b5d6508

File tree

1 file changed

+69
-10
lines changed

1 file changed

+69
-10
lines changed

ext/uri/uri_parser_rfc3986.c

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,12 @@
2525
struct php_uri_parser_rfc3986_uris {
2626
UriUriA uri;
2727
UriUriA normalized_uri;
28+
zend_long cached_port;
29+
unsigned int dirty_mask;
2830
bool normalized_uri_initialized;
31+
bool normalized_uri_is_alias;
32+
bool cached_port_valid;
33+
bool dirty_mask_valid;
2934
};
3035

3136
static void *php_uri_parser_rfc3986_memory_manager_malloc(UriMemoryManager *memory_manager, size_t size)
@@ -85,12 +90,30 @@ ZEND_ATTRIBUTE_NONNULL static void copy_uri(UriUriA *new_uriparser_uri, const Ur
8590

8691
ZEND_ATTRIBUTE_NONNULL static UriUriA *get_normalized_uri(php_uri_parser_rfc3986_uris *uriparser_uris) {
8792
if (!uriparser_uris->normalized_uri_initialized) {
93+
if (!uriparser_uris->dirty_mask_valid) {
94+
int mask_result = uriNormalizeSyntaxMaskRequiredExA(&uriparser_uris->uri, &uriparser_uris->dirty_mask);
95+
if (mask_result != URI_SUCCESS) {
96+
uriparser_uris->dirty_mask = (unsigned int)-1;
97+
}
98+
uriparser_uris->dirty_mask_valid = true;
99+
}
100+
101+
if (uriparser_uris->dirty_mask == 0) {
102+
uriparser_uris->normalized_uri_is_alias = true;
103+
uriparser_uris->normalized_uri_initialized = true;
104+
return &uriparser_uris->uri;
105+
}
106+
88107
copy_uri(&uriparser_uris->normalized_uri, &uriparser_uris->uri);
89-
int result = uriNormalizeSyntaxExMmA(&uriparser_uris->normalized_uri, (unsigned int)-1, mm);
108+
int result = uriNormalizeSyntaxExMmA(&uriparser_uris->normalized_uri, uriparser_uris->dirty_mask, mm);
90109
ZEND_ASSERT(result == URI_SUCCESS);
91110
uriparser_uris->normalized_uri_initialized = true;
92111
}
93112

113+
if (uriparser_uris->normalized_uri_is_alias) {
114+
return &uriparser_uris->uri;
115+
}
116+
94117
return &uriparser_uris->normalized_uri;
95118
}
96119

@@ -285,14 +308,18 @@ static zend_result php_uri_parser_rfc3986_host_write(void *uri, zval *value, zva
285308

286309
ZEND_ATTRIBUTE_NONNULL static zend_long port_str_to_zend_long_checked(const char *str, size_t len)
287310
{
288-
if (len > MAX_LENGTH_OF_LONG) {
311+
/* Caller guarantees str contains only ASCII digits (uriparser validates
312+
* portText during parsing). */
313+
if (UNEXPECTED(len == 0 || len > MAX_LENGTH_OF_LONG)) {
289314
return -1;
290315
}
291316

292-
char buf[MAX_LENGTH_OF_LONG + 1];
293-
*(char*)zend_mempcpy(buf, str, len) = 0;
294-
295-
zend_ulong result = ZEND_STRTOUL(buf, NULL, 10);
317+
zend_ulong result = 0;
318+
for (size_t i = 0; i < len; i++) {
319+
unsigned char digit = (unsigned char)(str[i] - '0');
320+
ZEND_ASSERT(digit <= 9);
321+
result = result * 10 + digit;
322+
}
296323

297324
if (result > ZEND_LONG_MAX) {
298325
return -1;
@@ -303,11 +330,27 @@ ZEND_ATTRIBUTE_NONNULL static zend_long port_str_to_zend_long_checked(const char
303330

304331
ZEND_ATTRIBUTE_NONNULL static zend_result php_uri_parser_rfc3986_port_read(void *uri, php_uri_component_read_mode read_mode, zval *retval)
305332
{
333+
php_uri_parser_rfc3986_uris *uriparser_uris = uri;
334+
335+
if (uriparser_uris->cached_port_valid) {
336+
if (uriparser_uris->cached_port >= 0) {
337+
ZVAL_LONG(retval, uriparser_uris->cached_port);
338+
} else {
339+
ZVAL_NULL(retval);
340+
}
341+
return SUCCESS;
342+
}
343+
306344
const UriUriA *uriparser_uri = get_uri_for_reading(uri, read_mode);
307345

308346
if (has_text_range(&uriparser_uri->portText) && get_text_range_length(&uriparser_uri->portText) > 0) {
309-
ZVAL_LONG(retval, port_str_to_zend_long_checked(uriparser_uri->portText.first, get_text_range_length(&uriparser_uri->portText)));
347+
zend_long port = port_str_to_zend_long_checked(uriparser_uri->portText.first, get_text_range_length(&uriparser_uri->portText));
348+
uriparser_uris->cached_port = port;
349+
uriparser_uris->cached_port_valid = true;
350+
ZVAL_LONG(retval, port);
310351
} else {
352+
uriparser_uris->cached_port = -1;
353+
uriparser_uris->cached_port_valid = true;
311354
ZVAL_NULL(retval);
312355
}
313356

@@ -316,9 +359,12 @@ ZEND_ATTRIBUTE_NONNULL static zend_result php_uri_parser_rfc3986_port_read(void
316359

317360
static zend_result php_uri_parser_rfc3986_port_write(void *uri, zval *value, zval *errors)
318361
{
362+
php_uri_parser_rfc3986_uris *uriparser_uris = uri;
319363
UriUriA *uriparser_uri = get_uri_for_writing(uri);
320364
int result;
321365

366+
uriparser_uris->cached_port_valid = false;
367+
322368
if (Z_TYPE_P(value) == IS_NULL) {
323369
result = uriSetPortTextMmA(uriparser_uri, NULL, NULL, mm);
324370
} else {
@@ -487,8 +533,11 @@ static zend_result php_uri_parser_rfc3986_fragment_write(void *uri, zval *value,
487533

488534
static php_uri_parser_rfc3986_uris *uriparser_create_uris(void)
489535
{
490-
php_uri_parser_rfc3986_uris *uriparser_uris = ecalloc(1, sizeof(*uriparser_uris));
536+
php_uri_parser_rfc3986_uris *uriparser_uris = emalloc(sizeof(*uriparser_uris));
491537
uriparser_uris->normalized_uri_initialized = false;
538+
uriparser_uris->normalized_uri_is_alias = false;
539+
uriparser_uris->cached_port_valid = false;
540+
uriparser_uris->dirty_mask_valid = false;
492541

493542
return uriparser_uris;
494543
}
@@ -545,18 +594,26 @@ php_uri_parser_rfc3986_uris *php_uri_parser_rfc3986_parse_ex(const char *uri_str
545594
/* Make the resulting URI independent of the 'uri_str'. */
546595
uriMakeOwnerMmA(&uri, mm);
547596

597+
zend_long parsed_port = -1;
598+
bool has_parsed_port = false;
548599
if (has_text_range(&uri.portText) && get_text_range_length(&uri.portText) > 0) {
549-
if (port_str_to_zend_long_checked(uri.portText.first, get_text_range_length(&uri.portText)) == -1) {
600+
parsed_port = port_str_to_zend_long_checked(uri.portText.first, get_text_range_length(&uri.portText));
601+
if (parsed_port == -1) {
550602
if (!silent) {
551603
zend_throw_exception(php_uri_ce_invalid_uri_exception, "The port is out of range", 0);
552604
}
553605

554606
goto fail;
555607
}
608+
has_parsed_port = true;
556609
}
557610

558611
php_uri_parser_rfc3986_uris *uriparser_uris = uriparser_create_uris();
559612
uriparser_uris->uri = uri;
613+
if (has_parsed_port) {
614+
uriparser_uris->cached_port = parsed_port;
615+
uriparser_uris->cached_port_valid = true;
616+
}
560617

561618
return uriparser_uris;
562619

@@ -626,7 +683,9 @@ static void php_uri_parser_rfc3986_destroy(void *uri)
626683
}
627684

628685
uriFreeUriMembersMmA(&uriparser_uris->uri, mm);
629-
uriFreeUriMembersMmA(&uriparser_uris->normalized_uri, mm);
686+
if (uriparser_uris->normalized_uri_initialized && !uriparser_uris->normalized_uri_is_alias) {
687+
uriFreeUriMembersMmA(&uriparser_uris->normalized_uri, mm);
688+
}
630689

631690
efree(uriparser_uris);
632691
}

0 commit comments

Comments
 (0)