From 508d61f8eec94397ea9415b35df52e4d7a5c0f18 Mon Sep 17 00:00:00 2001 From: nojimage Date: Wed, 12 Jul 2023 09:03:01 +0900 Subject: [PATCH] Don't count carriage return characters in end of line sequence --- lib/Twitter/Text/Parser.php | 1 + lib/Twitter/Text/StringUtils.php | 9 +++++++++ tests/TestCase/ParserTest.php | 20 ++++++++++++++++++++ tests/TestCase/StringUtilsTest.php | 11 +++++++++++ 4 files changed, 41 insertions(+) diff --git a/lib/Twitter/Text/Parser.php b/lib/Twitter/Text/Parser.php index 43fd851..9e8af61 100644 --- a/lib/Twitter/Text/Parser.php +++ b/lib/Twitter/Text/Parser.php @@ -61,6 +61,7 @@ public function parseTweet($tweet) return new ParseResults(); } + $tweet = StringUtils::normalizeLineFeed($tweet); $normalizedTweet = StringUtils::normalizeFromNFC($tweet); $normalizedTweetLength = StringUtils::strlen($normalizedTweet); diff --git a/lib/Twitter/Text/StringUtils.php b/lib/Twitter/Text/StringUtils.php index 784da92..d074c75 100644 --- a/lib/Twitter/Text/StringUtils.php +++ b/lib/Twitter/Text/StringUtils.php @@ -178,4 +178,13 @@ public static function charCount($string, $encoding = 'UTF-8') return $count; } + + /** + * @param string $string + * @return string + */ + public static function normalizeLineFeed(string $string): string + { + return str_replace("\r\n", "\n", $string); + } } diff --git a/tests/TestCase/ParserTest.php b/tests/TestCase/ParserTest.php index 45b9c47..a8c0486 100644 --- a/tests/TestCase/ParserTest.php +++ b/tests/TestCase/ParserTest.php @@ -243,4 +243,24 @@ public function testParseTweetWithEmojiNumberWithKeycapWithoutVariantSelector() $this->assertSame(0, $result->validRangeStart); $this->assertSame(1, $result->validRangeEnd); } + + /** + * test for parseTweet with Carriage Return characters + */ + public function testParseTweetWithCarriageReturn(): void + { + // @codingStandardsIgnoreStart + $text = "We're expanding the character limit! We want it to be easier and faster for everyone to express themselves.\r\n\r\nMore characters. More expression. More of what's happening.\r\nhttps://cards.twitter.com/cards/gsby/4ztbu"; + // @codingStandardsIgnoreEnd + $result = $this->parser->parseTweet($text); + + $this->assertInstanceOf('\Twitter\Text\ParseResults', $result); + $this->assertSame(192, $result->weightedLength); + $this->assertSame(685, $result->permillage); + $this->assertSame(true, $result->valid); + $this->assertSame(0, $result->displayRangeStart); + $this->assertSame(210, $result->displayRangeEnd); + $this->assertSame(0, $result->validRangeStart); + $this->assertSame(210, $result->validRangeEnd); + } } diff --git a/tests/TestCase/StringUtilsTest.php b/tests/TestCase/StringUtilsTest.php index c636a08..7a6c814 100644 --- a/tests/TestCase/StringUtilsTest.php +++ b/tests/TestCase/StringUtilsTest.php @@ -39,4 +39,15 @@ public function testCharCountEmoji() $this->assertSame(4, StringUtils::charCount('🧕🏾'), 'U+1F9D5 U+1F3FE woman with headscarf with medium-dark skin tone has 4 code point'); $this->assertSame(14, StringUtils::charCount('🏴󠁧󠁢󠁥󠁮󠁧󠁿'), 'flag (England) has 14 code point'); } + + /** + * Test for strip carriage return characters + * + * @return void + */ + public function testNormalizeLineFeed(): void + { + $this->assertSame("foo\nbar\n", StringUtils::normalizeLineFeed("foo\r\nbar\r\n"), "Strip CR and leave LF"); + $this->assertSame("foo\rbar", StringUtils::normalizeLineFeed("foo\rbar"), "Do not strip CR only"); + } }