Skip to content

Commit b6ff441

Browse files
Alexey PortnovAlexey Portnov
authored andcommitted
fix(syslogs): bound memory in LogTimestampParser::readLastLines (#1021)
Sentry #27765: fatal OOM (`Allowed memory size of 268435456 bytes exhausted`) in WorkerApiCommands when the syslog:getLogTimeRange API encountered a log file with no newline characters. The backward chunk reader accumulated 4 KiB per iteration into $chunk forever because explode("\n", $chunk) returned a single-element array on every pass. Rewrite readLastLines() to: - Cap accumulated $chunk at READ_LAST_LINES_MAX_BUFFER (1 MiB) and bail out cleanly on binary / single-line files. - Short-circuit files <= 64 KiB through file(FILE_IGNORE_NEW_LINES), which is faster and avoids the seek state machine entirely. - Track position via a local integer and SEEK_SET instead of fseek(-mb_strlen($chunk), SEEK_CUR), which was a pre-existing bug: on iterations after the first it sent the read pointer past the start of the previously-read region, causing partial-line overlap and skipped bytes between iterations. - Virtually clip a single trailing '\n' so explode() does not yield a spurious empty element that would consume one of the requested line slots. - Drop the never-firing `fseek(-1, SEEK_END); fread(1); fgets()` branch (after fread the position was at EOF, so fgets always returned false). Add tests/PBXCoreREST/SysLogs/LogTimestampParserTest.php with 13 PHPUnit cases (reflection-based unit tests for the private method plus end-to-end via getLogTimeRange): - empty/missing file, zero lines requested - small file with/without trailing newline, tail-limit - large file (>64 KiB) backward-seek path - large file with 5000-byte lines crossing the 4 KiB internal buffer (regression for the seek arithmetic bug) - 4 MiB file with no newlines must not exhaust memory: peak delta asserted < 6 MiB after memory_reset_peak_usage(), measured ~2 MiB - large file with multiple trailing blank lines preserves blanks matching file(FILE_IGNORE_NEW_LINES) semantics - getLogTimeRange end-to-end on normal asterisk format and on garbage binary file (returns null start/end without crashing)
1 parent da3c257 commit b6ff441

2 files changed

Lines changed: 295 additions & 18 deletions

File tree

src/PBXCoreREST/Lib/SysLogs/LogTimestampParser.php

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,19 @@ private static function getLastTimestamp(string $filename): ?int
207207
return null;
208208
}
209209

210+
/**
211+
* Maximum buffered chunk size for backward reading.
212+
* Files without newlines (binary, single-line JSON, corrupted logs) would otherwise
213+
* grow $chunk unbounded and OOM the worker. 1 MiB is far above any plausible log line.
214+
*/
215+
private const READ_LAST_LINES_MAX_BUFFER = 1048576;
216+
217+
/**
218+
* Files at or below this size are read whole instead of doing backward chunk seeks —
219+
* simpler, faster, and avoids the entire failure mode of the seeking algorithm.
220+
*/
221+
private const READ_LAST_LINES_SMALL_FILE_THRESHOLD = 65536;
222+
210223
/**
211224
* Read last N lines from file efficiently
212225
*
@@ -216,43 +229,75 @@ private static function getLastTimestamp(string $filename): ?int
216229
*/
217230
private static function readLastLines(string $filename, int $lines): array
218231
{
219-
$handle = fopen($filename, 'r');
220-
if ($handle === false) {
232+
$size = @filesize($filename);
233+
if ($size === false || $size === 0 || $lines <= 0) {
221234
return [];
222235
}
223236

224-
$buffer = 4096;
225-
$output = [];
237+
// Small file: just read it whole. file() handles trailing newlines correctly
238+
// and avoids the backward-seek state machine entirely.
239+
if ($size <= self::READ_LAST_LINES_SMALL_FILE_THRESHOLD) {
240+
$allLines = @file($filename, FILE_IGNORE_NEW_LINES);
241+
if ($allLines === false) {
242+
return [];
243+
}
244+
return array_slice($allLines, -$lines);
245+
}
226246

227-
fseek($handle, -1, SEEK_END);
247+
$handle = @fopen($filename, 'rb');
248+
if ($handle === false) {
249+
return [];
250+
}
228251

229-
if (fread($handle, 1) !== "\n") {
230-
$line = fgets($handle);
231-
if ($line !== false) {
232-
$output[] = $line;
233-
}
252+
// If the file ends with '\n', virtually clip it so that explode() won't produce
253+
// a spurious empty trailing element that would consume one of the requested slots.
254+
// Matches file()/FILE_IGNORE_NEW_LINES semantics for normal log files.
255+
fseek($handle, -1, SEEK_END);
256+
if (fread($handle, 1) === "\n") {
257+
$size--;
234258
}
235259

260+
$output = [];
236261
$chunk = '';
262+
$pos = $size;
263+
$bufferSize = 4096;
237264

238-
while (ftell($handle) > 0 && count($output) < $lines) {
239-
$seek = min(ftell($handle), $buffer);
240-
fseek($handle, -$seek, SEEK_CUR);
241-
$chunk = fread($handle, $seek) . $chunk;
242-
fseek($handle, -mb_strlen($chunk, '8bit'), SEEK_CUR);
265+
while ($pos > 0 && count($output) < $lines) {
266+
// Bail out on files without newlines (binary data, single huge JSON, etc.)
267+
// before $chunk grows large enough to exhaust memory_limit. See issue #1021.
268+
if (strlen($chunk) > self::READ_LAST_LINES_MAX_BUFFER) {
269+
break;
270+
}
243271

272+
$readSize = (int)min($pos, $bufferSize);
273+
$pos -= $readSize;
274+
if (fseek($handle, $pos, SEEK_SET) !== 0) {
275+
break;
276+
}
277+
$data = fread($handle, $readSize);
278+
if ($data === false) {
279+
break;
280+
}
281+
282+
$chunk = $data . $chunk;
244283
$linesArray = explode("\n", $chunk);
284+
// First element may be a partial line whose start lives in the not-yet-read part
285+
// of the file. Stash it for the next iteration; the rest are complete lines.
245286
$chunk = array_shift($linesArray);
246-
247287
$output = array_merge($linesArray, $output);
248288
}
249289

250-
if (count($output) >= $lines) {
251-
$output = array_slice($output, -$lines);
290+
// Reached the start of the file: $chunk now holds the very first line.
291+
if ($pos === 0 && $chunk !== '') {
292+
array_unshift($output, $chunk);
252293
}
253294

254295
fclose($handle);
255296

297+
if (count($output) > $lines) {
298+
$output = array_slice($output, -$lines);
299+
}
300+
256301
return $output;
257302
}
258303

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
<?php
2+
/*
3+
* MikoPBX - free phone system for small business
4+
* Copyright © 2017-2026 Alexey Portnov and Nikolay Beketov
5+
*
6+
* This program is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License as published by
8+
* the Free Software Foundation; either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License along with this program.
17+
* If not, see <https://www.gnu.org/licenses/>.
18+
*/
19+
20+
declare(strict_types=1);
21+
22+
namespace MikoPBX\Tests\PBXCoreREST\SysLogs;
23+
24+
use MikoPBX\PBXCoreREST\Lib\SysLogs\LogTimestampParser;
25+
use PHPUnit\Framework\TestCase;
26+
use ReflectionClass;
27+
28+
/**
29+
* Unit tests for LogTimestampParser, in particular the regression for issue #1021
30+
* (OOM in readLastLines when the file contains no newline characters).
31+
*
32+
* Tests use reflection to call the private readLastLines() method directly so the
33+
* algorithm can be exercised without standing up a full DI container.
34+
*/
35+
class LogTimestampParserTest extends TestCase
36+
{
37+
private string $tmpDir;
38+
39+
protected function setUp(): void
40+
{
41+
parent::setUp();
42+
$this->tmpDir = sys_get_temp_dir() . '/log_ts_parser_' . bin2hex(random_bytes(4));
43+
mkdir($this->tmpDir);
44+
}
45+
46+
protected function tearDown(): void
47+
{
48+
foreach (glob($this->tmpDir . '/*') ?: [] as $file) {
49+
@unlink($file);
50+
}
51+
@rmdir($this->tmpDir);
52+
parent::tearDown();
53+
}
54+
55+
/**
56+
* Invoke the private readLastLines method via reflection.
57+
*
58+
* @return array<int, string>
59+
*/
60+
private function callReadLastLines(string $filename, int $lines): array
61+
{
62+
$reflection = new ReflectionClass(LogTimestampParser::class);
63+
$method = $reflection->getMethod('readLastLines');
64+
$method->setAccessible(true);
65+
return $method->invoke(null, $filename, $lines);
66+
}
67+
68+
private function writeFile(string $name, string $content): string
69+
{
70+
$path = $this->tmpDir . '/' . $name;
71+
file_put_contents($path, $content);
72+
return $path;
73+
}
74+
75+
public function testMissingFileReturnsEmpty(): void
76+
{
77+
self::assertSame([], $this->callReadLastLines($this->tmpDir . '/nope.log', 10));
78+
}
79+
80+
public function testEmptyFileReturnsEmpty(): void
81+
{
82+
$path = $this->writeFile('empty.log', '');
83+
self::assertSame([], $this->callReadLastLines($path, 10));
84+
}
85+
86+
public function testZeroLinesRequestedReturnsEmpty(): void
87+
{
88+
$path = $this->writeFile('basic.log', "a\nb\nc\n");
89+
self::assertSame([], $this->callReadLastLines($path, 0));
90+
}
91+
92+
public function testSmallFileWithTrailingNewline(): void
93+
{
94+
$path = $this->writeFile('small.log', "line1\nline2\nline3\n");
95+
self::assertSame(['line1', 'line2', 'line3'], $this->callReadLastLines($path, 10));
96+
}
97+
98+
public function testSmallFileWithoutTrailingNewline(): void
99+
{
100+
$path = $this->writeFile('small.log', "line1\nline2\nline3");
101+
self::assertSame(['line1', 'line2', 'line3'], $this->callReadLastLines($path, 10));
102+
}
103+
104+
public function testSmallFileLimitsToRequestedTail(): void
105+
{
106+
$content = '';
107+
for ($i = 1; $i <= 50; $i++) {
108+
$content .= "line{$i}\n";
109+
}
110+
$path = $this->writeFile('tail.log', $content);
111+
112+
$result = $this->callReadLastLines($path, 5);
113+
self::assertSame(['line46', 'line47', 'line48', 'line49', 'line50'], $result);
114+
}
115+
116+
public function testLargeFileBackwardSeekPath(): void
117+
{
118+
// Build a file safely above the 64 KiB small-file threshold so the backward
119+
// seeking path is exercised. 5000 short lines ≈ 50 KiB per 1k lines, so 5k lines ≈ 250 KiB.
120+
$content = '';
121+
for ($i = 1; $i <= 5000; $i++) {
122+
$content .= sprintf("[2025-10-09 08:21:%02d] line%05d payload\n", $i % 60, $i);
123+
}
124+
$path = $this->writeFile('big.log', $content);
125+
self::assertGreaterThan(65536, filesize($path));
126+
127+
$result = $this->callReadLastLines($path, 100);
128+
self::assertCount(100, $result);
129+
self::assertStringContainsString('line04901', $result[0]);
130+
self::assertStringContainsString('line05000', $result[99]);
131+
}
132+
133+
public function testLargeFileWithLongLinesCrossingBufferBoundary(): void
134+
{
135+
// Lines longer than the 4 KiB internal buffer to verify the partial-line
136+
// accumulation across iterations works correctly after the seek fix.
137+
$longPayload = str_repeat('x', 5000);
138+
$content = '';
139+
for ($i = 1; $i <= 50; $i++) {
140+
$content .= "tag{$i}|{$longPayload}\n";
141+
}
142+
$path = $this->writeFile('long_lines.log', $content);
143+
self::assertGreaterThan(65536, filesize($path));
144+
145+
$result = $this->callReadLastLines($path, 3);
146+
self::assertCount(3, $result);
147+
self::assertStringStartsWith('tag48|', $result[0]);
148+
self::assertStringStartsWith('tag49|', $result[1]);
149+
self::assertStringStartsWith('tag50|', $result[2]);
150+
self::assertSame(strlen("tag50|") + 5000, strlen($result[2]));
151+
}
152+
153+
/**
154+
* Regression for issue #1021: a multi-megabyte file containing no '\n' must NOT
155+
* exhaust memory_limit. Before the fix, $chunk grew by 4 KiB per iteration until
156+
* it consumed the entire file. The guard caps it at READ_LAST_LINES_MAX_BUFFER (1 MiB).
157+
*/
158+
public function testFileWithoutNewlinesDoesNotExhaustMemory(): void
159+
{
160+
// 4 MiB of 'A' with zero newlines — the worst case from the Sentry crash.
161+
$payload = str_repeat('A', 4 * 1024 * 1024);
162+
$path = $this->writeFile('binaryish.log', $payload);
163+
164+
// Reset the process-wide peak so prior fixtures in this run can't mask the leak.
165+
// PHP 8.2+ — the project requires 8.3.
166+
memory_reset_peak_usage();
167+
$memBefore = memory_get_usage();
168+
$result = $this->callReadLastLines($path, 100);
169+
$memPeak = memory_get_peak_usage();
170+
171+
// Should bail cleanly without OOM, not crash. Result content is best-effort
172+
// (algorithm cannot recover lines from a file with no separators).
173+
self::assertIsArray($result);
174+
175+
// Critically: peak memory growth must stay under a few MiB, not climb to 4+ MiB
176+
// (the file size). The 1 MiB chunk cap plus the 4 KiB read buffer plus PHP's
177+
// per-string overhead leaves ~3 MiB of headroom; we assert 6 MiB to absorb
178+
// explode()'s element-array allocations on small fixtures.
179+
self::assertLessThan(
180+
6 * 1024 * 1024,
181+
$memPeak - $memBefore,
182+
'readLastLines leaked memory while scanning a newline-free file'
183+
);
184+
}
185+
186+
/**
187+
* file()/FILE_IGNORE_NEW_LINES preserves blank lines in the middle and (importantly)
188+
* blank lines at the end. Our backward-seek implementation should match.
189+
*/
190+
public function testLargeFileWithMultipleTrailingBlankLines(): void
191+
{
192+
// 18 KiB filler so the file crosses the 64 KiB threshold and the backward
193+
// seek path is exercised.
194+
$filler = str_repeat("filler line keeps file above the small-file threshold\n", 1500);
195+
$content = $filler . "alpha\nbeta\n\n\n";
196+
$path = $this->writeFile('trailing_blanks.log', $content);
197+
self::assertGreaterThan(65536, filesize($path));
198+
199+
$result = $this->callReadLastLines($path, 5);
200+
201+
// The trailing single \n is the file terminator (not a content line).
202+
// The two preceding \n produce two empty content lines, matching file().
203+
self::assertSame(['filler line keeps file above the small-file threshold', 'alpha', 'beta', '', ''], $result);
204+
}
205+
206+
public function testGetLogTimeRangeOnNormalFile(): void
207+
{
208+
$content = <<<LOG
209+
[2025-10-09 08:00:00] start of log
210+
[2025-10-09 08:15:32] some entry
211+
[2025-10-09 09:30:45] another entry
212+
[2025-10-09 10:00:00] end of log
213+
214+
LOG;
215+
$path = $this->writeFile('asterisk.log', $content);
216+
217+
$range = LogTimestampParser::getLogTimeRange($path);
218+
self::assertSame(strtotime('2025-10-09 08:00:00'), $range['start']);
219+
self::assertSame(strtotime('2025-10-09 10:00:00'), $range['end']);
220+
}
221+
222+
public function testGetLogTimeRangeOnFileWithoutNewlinesReturnsNullsCleanly(): void
223+
{
224+
// 2 MiB of garbage without newlines — must not crash even though
225+
// no timestamp can be extracted.
226+
$path = $this->writeFile('garbage.log', str_repeat("\x01\x02\x03\x04", 512 * 1024));
227+
228+
$range = LogTimestampParser::getLogTimeRange($path);
229+
self::assertNull($range['start']);
230+
self::assertNull($range['end']);
231+
}
232+
}

0 commit comments

Comments
 (0)