From 9a0fe35a710e6c105eb9bec3fec68224401a5243 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 8 Dec 2024 21:55:27 +0800 Subject: [PATCH 01/21] Fix config.yml for CircleCI and test py313 --- .circleci/config.yml | 35 ++++++++++++++++++++++++----------- setup.py | 9 ++++++--- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b16630a..7927387 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,9 +5,9 @@ orbs: codecov: codecov/codecov@1.0.2 jobs: - py37: &test-template + py39: &test-template docker: - - image: circleci/python:3.7 + - image: cimg/python:3.9 working_directory: ~/repo @@ -77,23 +77,36 @@ jobs: py27: <<: *test-template docker: - - image: circleci/python:2.7 - - py36: + - image: cimg/python:2.7 + py38: <<: *test-template docker: - - image: circleci/python:3.6 - - py38: + - image: cimg/python:3.8 + py310: + <<: *test-template + docker: + - image: cimg/python:3.10 + py311: + <<: *test-template + docker: + - image: cimg/python:3.11 + py312: + <<: *test-template + docker: + - image: cimg/python:3.12 + py312: <<: *test-template docker: - - image: circleci/python:3.8 + - image: cimg/python:3.13 workflows: test: jobs: - py27: is-py27: true - - py36 - - py37 - py38 + - py39 + - py310 + - py311 + - py312 + - py313 diff --git a/setup.py b/setup.py index 3186a41..eff3943 100644 --- a/setup.py +++ b/setup.py @@ -45,8 +45,11 @@ classifiers=[ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7" + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] ) From a0729cba882c82f8a649b7ca277809009ca03516 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 8 Dec 2024 21:55:56 +0800 Subject: [PATCH 02/21] Fix parse_crawler_stats for None value --- logparser/common.py | 3 +++ logparser/telnet.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/logparser/common.py b/logparser/common.py index ae25711..dd6e0b3 100644 --- a/logparser/common.py +++ b/logparser/common.py @@ -179,11 +179,14 @@ def parse_crawler_stats(text): # self.crawler.stats.inc_value( # 'crawlera/response/error/%s' % crawlera_error.decode('utf8')) # u"crawlera/response/error/timeout": 1 + # 'items_per_minute': None, + # 'responses_per_minute': None, backup = text text = re.sub(r'(datetime.datetime\(.+?\))', r'"\1"', text) text = re.sub(r'(".*?)\'(.*?)\'(.*?")', r'\1_\2_\3', text) text = re.sub(r"'(.+?)'", r'"\1"', text) text = re.sub(r'[bu]"(.+?)"', r'"\1"', text) + text = re.sub(r': None([,}])', r': null\1', text) try: return json.loads(text) except ValueError as err: diff --git a/logparser/telnet.py b/logparser/telnet.py index 93b1a27..51c8599 100644 --- a/logparser/telnet.py +++ b/logparser/telnet.py @@ -148,6 +148,8 @@ def parse_output(self, text): result[k] = True elif v == 'False': result[k] = False + elif v == 'None': + result[k] = None else: try: result[k] = int(float(v)) From 158219f72f59347c546c4b1ed93fd2e054dbdde8 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 8 Dec 2024 21:57:00 +0800 Subject: [PATCH 03/21] Update testcases in test_telnet --- tests/test_telnet.py | 23 ++++++++++++++++++++--- tests/test_utils.py | 5 ++++- tests/utils.py | 2 +- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index ee3d126..21430be 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -18,15 +18,29 @@ def test_telnet(psr): cwd = os.getcwd() os.chdir(cst.DEMO_PROJECT_PATH) + print(os.getcwd()) + # ['1.4.0', '1.5.0', '1.5.1', '1.5.2', '1.6.0', 'latest'] + # Ref: https://github.com/scrapy/scrapy/issues/6024 + # "We just released 2.10.1 with the Twisted version restricted as a workaround for this." + # Ref: https://github.com/scrapy/scrapy/pull/6064 + # Fixes #6024 try: - for version in ['1.4.0', '1.5.0', '1.5.1', '1.5.2', '1.6.0', 'latest']: + # scrapyd 1.4.3 requires scrapy>=2.0.0 + cst.sub_process('pip uninstall -y scrapyd', block=True) + cst.sub_process('pip uninstall -y scrapy', block=True) + # ['2.0.0', '2.10.1', 'latest']: + for version in ['2.10.1', 'latest', '1.5.1']: if version == 'latest': cmd = 'pip install --upgrade scrapy' else: + # cst.sub_process('pip uninstall -y Twisted', block=True) + if version < '2.10.1': + cst.sub_process('pip install Twisted==20.3.0', block=True) cmd = 'pip install scrapy==%s' % version cst.sub_process(cmd, block=True) log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s.log' % version) cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file + print(cmd) if version == '1.5.0': cmd += ' -s TELNETCONSOLE_ENABLED=False' elif version == '1.5.2': @@ -88,8 +102,11 @@ def test_disable_telnet(psr): cwd = os.getcwd() os.chdir(cst.DEMO_PROJECT_PATH) try: - version = '1.5.1' if (cst.ON_WINDOWS or on_fedora) else '1.6.0' - cmd = 'pip install scrapy==%s' % version + if (cst.ON_WINDOWS or on_fedora): + cmd = 'pip install scrapy==%s' % '1.5.1' + else: + cmd = 'pip install --upgrade scrapy' + print(cmd) cst.sub_process(cmd, block=True) for name in ['enable_telnet', 'disable_telnet']: enable_telnet = name == 'enable_telnet' diff --git a/tests/test_utils.py b/tests/test_utils.py index 7b46f79..0345adf 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,7 +11,10 @@ def test_run_py(): def test_check_update(): js = check_update(timeout=60) - assert 'latest_version' in js and 'info' in js + if js: + assert 'latest_version' in js and 'info' in js + else: + print('Got empty js.') def test_main_pid_exit(psr): diff --git a/tests/utils.py b/tests/utils.py index e4c166f..f5dc55a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -154,7 +154,7 @@ def string_to_timestamp(string): def timestamp_to_string(timestamp): return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') - def sub_process(self, args, block=False, timeout=60): + def sub_process(self, args, block=False, timeout=120): proc = Popen(args.split()) if block: # TODO: In PY2: TypeError: communicate() got an unexpected keyword argument 'timeout' From 913337066c3145c80f7f3137d35c61dd5b7a4c1f Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 8 Dec 2024 22:06:17 +0800 Subject: [PATCH 04/21] Fix config.yml --- .circleci/config.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7927387..7886559 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,29 +1,22 @@ # Python CircleCI 2.1 configuration file version: 2.1 - orbs: codecov: codecov/codecov@1.0.2 - jobs: py39: &test-template docker: - image: cimg/python:3.9 - working_directory: ~/repo - parameters: is-py27: type: boolean default: false - steps: - run: name: Install telnet command: | sudo apt-get update && sudo apt-get install telnet - - checkout - - when: condition: <> steps: @@ -38,7 +31,6 @@ jobs: name: Create virtual env in PY3 command: | python3 -m venv venv - - run: name: Install dependencies command: | @@ -49,7 +41,6 @@ jobs: python --version pip install -r requirements.txt pip install -r requirements-tests.txt - - run: name: Run tests command: | @@ -73,7 +64,6 @@ jobs: path: coverage.xml - codecov/upload: file: coverage.xml - py27: <<: *test-template docker: @@ -94,11 +84,10 @@ jobs: <<: *test-template docker: - image: cimg/python:3.12 - py312: + py313: <<: *test-template docker: - image: cimg/python:3.13 - workflows: test: jobs: From 8bba648c1cfb5981c0cd015156770ba108a192b1 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 8 Dec 2024 22:09:03 +0800 Subject: [PATCH 05/21] Update test_telnet.py --- tests/test_telnet.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index 21430be..f22a6af 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -103,8 +103,10 @@ def test_disable_telnet(psr): os.chdir(cst.DEMO_PROJECT_PATH) try: if (cst.ON_WINDOWS or on_fedora): - cmd = 'pip install scrapy==%s' % '1.5.1' + version = '1.5.1' + cmd = 'pip install scrapy==%s' % version else: + version = None cmd = 'pip install --upgrade scrapy' print(cmd) cst.sub_process(cmd, block=True) @@ -138,7 +140,8 @@ def test_disable_telnet(psr): time.sleep(30) parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) - assert log_data['latest_matches']['scrapy_version'] == version + if version: + assert log_data['latest_matches']['scrapy_version'] == version assert log_data['latest_matches']['telnet_console'] assert log_data['crawler_stats']['source'] == 'log' if enable_telnet: From 51ef9680df5e1120293ffe855ebe8eceef094dd5 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 13:51:23 +0800 Subject: [PATCH 06/21] Update test_telnet.py again --- logparser/__version__.py | 2 +- tests/test_telnet.py | 34 +++++++++++++++++----------------- tests/utils.py | 6 ++++-- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/logparser/__version__.py b/logparser/__version__.py index ab1ec3b..d5f912f 100644 --- a/logparser/__version__.py +++ b/logparser/__version__.py @@ -1,7 +1,7 @@ # coding: utf-8 __title__ = 'logparser' -__version__ = '0.8.2' +__version__ = '0.8.3' __author__ = 'my8100' __author_email__ = 'my8100@gmail.com' __url__ = 'https://github.com/my8100/logparser' diff --git a/tests/test_telnet.py b/tests/test_telnet.py index f22a6af..eaade0c 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -17,6 +17,7 @@ def test_telnet(psr): parser = psr(execute_main=False) cwd = os.getcwd() + print(cwd) os.chdir(cst.DEMO_PROJECT_PATH) print(os.getcwd()) # ['1.4.0', '1.5.0', '1.5.1', '1.5.2', '1.6.0', 'latest'] @@ -28,24 +29,24 @@ def test_telnet(psr): # scrapyd 1.4.3 requires scrapy>=2.0.0 cst.sub_process('pip uninstall -y scrapyd', block=True) cst.sub_process('pip uninstall -y scrapy', block=True) - # ['2.0.0', '2.10.1', 'latest']: - for version in ['2.10.1', 'latest', '1.5.1']: + # ['2.0.0', '2.10.1', 'latest'] + # ['2.10.1', 'latest', '1.5.1'] + for version in ['latest', '2.10.1']: if version == 'latest': - cmd = 'pip install --upgrade scrapy' + pip_cmd = 'pip install --upgrade scrapy' else: # cst.sub_process('pip uninstall -y Twisted', block=True) if version < '2.10.1': cst.sub_process('pip install Twisted==20.3.0', block=True) - cmd = 'pip install scrapy==%s' % version - cst.sub_process(cmd, block=True) + pip_cmd = 'pip install scrapy==%s' % version + cst.sub_process(pip_cmd, block=True) log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s.log' % version) - cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file - print(cmd) + scrapy_cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file if version == '1.5.0': - cmd += ' -s TELNETCONSOLE_ENABLED=False' + scrapy_cmd += ' -s TELNETCONSOLE_ENABLED=False' elif version == '1.5.2': - cmd += ' -s TELNETCONSOLE_USERNAME=usr123 -s TELNETCONSOLE_PASSWORD=psw456' - proc = cst.sub_process(cmd) + scrapy_cmd += ' -s TELNETCONSOLE_USERNAME=usr123 -s TELNETCONSOLE_PASSWORD=psw456' + proc = cst.sub_process(scrapy_cmd) time.sleep(10) if version == '1.4.0': @@ -102,14 +103,13 @@ def test_disable_telnet(psr): cwd = os.getcwd() os.chdir(cst.DEMO_PROJECT_PATH) try: - if (cst.ON_WINDOWS or on_fedora): + if cst.ON_WINDOWS or on_fedora: version = '1.5.1' - cmd = 'pip install scrapy==%s' % version + pip_cmd = 'pip install scrapy==%s' % version else: version = None - cmd = 'pip install --upgrade scrapy' - print(cmd) - cst.sub_process(cmd, block=True) + pip_cmd = 'pip install --upgrade scrapy' + cst.sub_process(pip_cmd, block=True) for name in ['enable_telnet', 'disable_telnet']: enable_telnet = name == 'enable_telnet' parser = psr(execute_main=False, enable_telnet=enable_telnet) @@ -120,8 +120,8 @@ def test_disable_telnet(psr): cst.write_text(_log_file, TELNET_151_PORT_16023.replace(':16023', ':%s' % _name)) log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, '%s.log' % name) - cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=40 -s LOG_FILE=%s' % log_file - cst.sub_process(cmd) + scrapy_cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=40 -s LOG_FILE=%s' % log_file + cst.sub_process(scrapy_cmd) time.sleep(10) parser.main() if enable_telnet: diff --git a/tests/utils.py b/tests/utils.py index f5dc55a..8c53c13 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -154,8 +154,10 @@ def string_to_timestamp(string): def timestamp_to_string(timestamp): return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') - def sub_process(self, args, block=False, timeout=120): - proc = Popen(args.split()) + def sub_process(self, cmd, block=False, timeout=120): + print(cmd) + args = cmd.split() + proc = Popen(args) if block: # TODO: In PY2: TypeError: communicate() got an unexpected keyword argument 'timeout' if self.PY2: From aaba94cd7380dd23ba52de1743db06eebe34e3cd Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 15:03:21 +0800 Subject: [PATCH 07/21] Fix telnet issue for Python 3.13 --- logparser/telnet.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/logparser/telnet.py b/logparser/telnet.py index 51c8599..b48e4be 100644 --- a/logparser/telnet.py +++ b/logparser/telnet.py @@ -5,7 +5,11 @@ import platform import re import sys -from telnetlib import DO, DONT, IAC, SB, SE, Telnet, TTYPE, WILL, WONT +# DeprecationWarning: 'telnetlib' is deprecated and slated for removal in Python 3.13 +try: + import telnetlib +except ImportError: + telnetlib = None import traceback import pexpect @@ -95,7 +99,7 @@ def run(self): self.host = self.OVERRIDE_TELNET_CONSOLE_HOST or self.host self.logger.debug("Try to telnet to %s:%s for %s", self.host, self.port, self.data['log_path']) - if self.telnet_password: + if self.telnet_password or telnetlib is None: self.setup_pexpect() if self.tn is not None: self.pexpect_io() @@ -117,16 +121,16 @@ def setup_pexpect(self): @staticmethod def telnet_callback(tn, command, option): - if command == DO and option == TTYPE: - tn.sendall(IAC + WILL + TTYPE) - tn.sendall(IAC + SB + TTYPE + '\0' + 'LogParser' + IAC + SE) - elif command in (DO, DONT): - tn.sendall(IAC + WILL + option) - elif command in (WILL, WONT): - tn.sendall(IAC + DO + option) + if command == telnetlib.DO and option == telnetlib.TTYPE: + tn.sendall(telnetlib.IAC + telnetlib.WILL + telnetlib.TTYPE) + tn.sendall(telnetlib.IAC + telnetlib.SB + telnetlib.TTYPE + '\0' + 'LogParser' + telnetlib.IAC + telnetlib.SE) + elif command in (telnetlib.DO, telnetlib.DONT): + tn.sendall(telnetlib.IAC + telnetlib.WILL + option) + elif command in (telnetlib.WILL, telnetlib.WONT): + tn.sendall(telnetlib.IAC + telnetlib.DO + option) def setup_telnet(self): - self.tn = Telnet(self.host, int(self.port), timeout=TELNET_TIMEOUT) + self.tn = telnetlib.Telnet(self.host, int(self.port), timeout=TELNET_TIMEOUT) # [twisted] CRITICAL: Unhandled Error # Failure: twisted.conch.telnet.OptionRefused: twisted.conch.telnet.OptionRefused # https://github.com/jookies/jasmin-web/issues/2 @@ -167,11 +171,14 @@ def bytes_to_str(src): return src.decode('utf-8') # TypeError: got ('Username: ') as pattern, # must be one of: , pexpect.EOF, pexpect.TIMEOUT - self.tn.expect(u'Username: ', timeout=TELNET_TIMEOUT) - self.tn.sendline(self.telnet_username) - self.tn.expect(u'Password: ', timeout=TELNET_TIMEOUT) - self.tn.sendline(self.telnet_password) - self.tn.expect(u'>>>', timeout=TELNET_TIMEOUT) + try: + self.tn.expect(u'Username: ', timeout=TELNET_TIMEOUT) + self.tn.sendline(self.telnet_username) + self.tn.expect(u'Password: ', timeout=TELNET_TIMEOUT) + self.tn.sendline(self.telnet_password) + self.tn.expect(u'>>>', timeout=TELNET_TIMEOUT) + except Exception as err: + self.logger.warning("Found error in pexpect_io: %s" % err) self.tn.sendline(bytes_to_str(TELNETCONSOLE_COMMAND_MAP['log_file'])) self.tn.expect(re.compile(r'[\'"].+>>>', re.S), timeout=TELNET_TIMEOUT) From 6ee4c70e85921505e38e106ebb431ae3467d3efd Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 15:10:00 +0800 Subject: [PATCH 08/21] Update test_telnet.py --- tests/test_telnet.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index eaade0c..33c16fe 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -14,6 +14,7 @@ def test_telnet(psr): + # https://docs.scrapy.org/en/latest/topics/telnetconsole.html parser = psr(execute_main=False) cwd = os.getcwd() @@ -29,9 +30,7 @@ def test_telnet(psr): # scrapyd 1.4.3 requires scrapy>=2.0.0 cst.sub_process('pip uninstall -y scrapyd', block=True) cst.sub_process('pip uninstall -y scrapy', block=True) - # ['2.0.0', '2.10.1', 'latest'] - # ['2.10.1', 'latest', '1.5.1'] - for version in ['latest', '2.10.1']: + for version in ['latest', '2.10.1', '2.12.0']: if version == 'latest': pip_cmd = 'pip install --upgrade scrapy' else: @@ -42,9 +41,9 @@ def test_telnet(psr): cst.sub_process(pip_cmd, block=True) log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s.log' % version) scrapy_cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file - if version == '1.5.0': + if version == '2.10.1': scrapy_cmd += ' -s TELNETCONSOLE_ENABLED=False' - elif version == '1.5.2': + elif version == '2.12.0': scrapy_cmd += ' -s TELNETCONSOLE_USERNAME=usr123 -s TELNETCONSOLE_PASSWORD=psw456' proc = cst.sub_process(scrapy_cmd) @@ -64,14 +63,14 @@ def test_telnet(psr): assert log_data['latest_matches']['scrapy_version'] == version assert log_data['log_categories']['critical_logs']['count'] == 0 assert log_data['log_categories']['error_logs']['count'] == 0 - if version == '1.5.0': + if version == '2.10.1': assert not log_data['latest_matches']['telnet_console'] else: assert log_data['latest_matches']['telnet_console'] if version <= '1.5.1': assert not log_data['latest_matches']['telnet_username'] assert not log_data['latest_matches']['telnet_password'] - elif version == '1.5.2': + elif version == '2.12.0': assert log_data['latest_matches']['telnet_username'] == 'usr123' assert log_data['latest_matches']['telnet_password'] == 'psw456' else: @@ -85,7 +84,7 @@ def test_telnet(psr): assert log_data['finish_reason'] == 'closespider_timeout' assert log_data['crawler_stats'] assert log_data['crawler_stats']['source'] == 'log' - if version == '1.5.0' or ((cst.ON_WINDOWS or on_fedora) and version > '1.5.1'): + if version == '2.10.1' or ((cst.ON_WINDOWS or on_fedora) and version > '1.5.1'): assert not log_data['crawler_engine'] else: assert log_data['crawler_engine'] @@ -103,12 +102,12 @@ def test_disable_telnet(psr): cwd = os.getcwd() os.chdir(cst.DEMO_PROJECT_PATH) try: - if cst.ON_WINDOWS or on_fedora: - version = '1.5.1' - pip_cmd = 'pip install scrapy==%s' % version - else: - version = None - pip_cmd = 'pip install --upgrade scrapy' + # if cst.ON_WINDOWS or on_fedora: + # version = '1.5.1' + # pip_cmd = 'pip install scrapy==%s' % version + # else: + version = None + pip_cmd = 'pip install --upgrade scrapy' cst.sub_process(pip_cmd, block=True) for name in ['enable_telnet', 'disable_telnet']: enable_telnet = name == 'enable_telnet' From 68e1029d29bbb81a2afb59d77c71cd695804ed6e Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 15:23:01 +0800 Subject: [PATCH 09/21] Update test_telnet.py --- tests/test_telnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index 33c16fe..3093f6e 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -67,7 +67,7 @@ def test_telnet(psr): assert not log_data['latest_matches']['telnet_console'] else: assert log_data['latest_matches']['telnet_console'] - if version <= '1.5.1': + if version == '2.10.1': assert not log_data['latest_matches']['telnet_username'] assert not log_data['latest_matches']['telnet_password'] elif version == '2.12.0': From 7d5c87039f6d8b900e76889ee08419d21ccd3ae4 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 16:23:47 +0800 Subject: [PATCH 10/21] Update common.py --- logparser/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/logparser/common.py b/logparser/common.py index dd6e0b3..91328ec 100644 --- a/logparser/common.py +++ b/logparser/common.py @@ -54,9 +54,9 @@ scrapy_version=r'Scrapy[ ]\d+\.\d+\.\d+[ ]started', # Scrapy 1.5.1 started (bot: demo) telnet_console=r'Telnet[ ]console[ ]listening[ ]on', # Telnet console listening on 127.0.0.1:6023 # Default: 'scrapy' | Overridden settings: {'TELNETCONSOLE_USERNAME': 'usr'} - telnet_username=r'Overridden[ ]settings:.+TELNETCONSOLE_USERNAME', + telnet_username=r'TELNETCONSOLE_USERNAME\W:.+', # Telnet Password: 865bba341ef25552 | Overridden settings: {'TELNETCONSOLE_PASSWORD': 'psw'} - telnet_password=r'Overridden[ ]settings:.+TELNETCONSOLE_PASSWORD|Telnet[ ]Password:[ ].+', + telnet_password=r'TELNETCONSOLE_PASSWORD\W:.+|Telnet[ ]Password:[ ].+', resuming_crawl=r'Resuming[ ]crawl', # Resuming crawl (675840 requests scheduled) latest_offsite=r'Filtered[ ]offsite', # Filtered offsite request to 'www.baidu.com' latest_duplicate=r'Filtered[ ]duplicate', # Filtered duplicate request: @@ -71,7 +71,8 @@ _odict.update({k: LATEST_MATCHES_PATTERN_DICT[k]}) LATEST_MATCHES_PATTERN_DICT = _odict for k, v in LATEST_MATCHES_PATTERN_DICT.items(): - LATEST_MATCHES_PATTERN_DICT[k] = r'^%s[ ].+?%s' % (DATETIME_PATTERN, v) + if k not in ['telnet_username', 'telnet_password']: + LATEST_MATCHES_PATTERN_DICT[k] = r'^%s[ ].+?%s' % (DATETIME_PATTERN, v) # 2019-01-01 00:00:01 [scrapy.core.scraper] DEBUG: Scraped from <200 http://httpbin.org/headers> LATEST_SCRAPE_ITEM_PATTERN = re.compile(r"""\n From 362fcb1299f12c19c3df76e31a43967aa6dc11c4 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 17:01:56 +0800 Subject: [PATCH 11/21] Update test_telnet.py --- tests/test_telnet.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index 3093f6e..e1debe1 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -30,7 +30,10 @@ def test_telnet(psr): # scrapyd 1.4.3 requires scrapy>=2.0.0 cst.sub_process('pip uninstall -y scrapyd', block=True) cst.sub_process('pip uninstall -y scrapy', block=True) - for version in ['latest', '2.10.1', '2.12.0']: + # py3.13 + scrapy 2.10.1: No module named 'cgi' + # scrapy 2.12.0 Requires-Python >=3.9, the final version for py3.8 is 2.11.2 + # history: 2.10.1, 2.11.0, 2.11.1, 2.11.2, 2.12.0 + for version in ['latest', '2.11.0', '2.11.1']: if version == 'latest': pip_cmd = 'pip install --upgrade scrapy' else: @@ -41,9 +44,9 @@ def test_telnet(psr): cst.sub_process(pip_cmd, block=True) log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s.log' % version) scrapy_cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file - if version == '2.10.1': + if version == '2.11.0': scrapy_cmd += ' -s TELNETCONSOLE_ENABLED=False' - elif version == '2.12.0': + elif version == '2.11.1': scrapy_cmd += ' -s TELNETCONSOLE_USERNAME=usr123 -s TELNETCONSOLE_PASSWORD=psw456' proc = cst.sub_process(scrapy_cmd) @@ -63,14 +66,14 @@ def test_telnet(psr): assert log_data['latest_matches']['scrapy_version'] == version assert log_data['log_categories']['critical_logs']['count'] == 0 assert log_data['log_categories']['error_logs']['count'] == 0 - if version == '2.10.1': + if version == '2.11.0': assert not log_data['latest_matches']['telnet_console'] else: assert log_data['latest_matches']['telnet_console'] - if version == '2.10.1': + if version == '2.11.0': assert not log_data['latest_matches']['telnet_username'] assert not log_data['latest_matches']['telnet_password'] - elif version == '2.12.0': + elif version == '2.11.1': assert log_data['latest_matches']['telnet_username'] == 'usr123' assert log_data['latest_matches']['telnet_password'] == 'psw456' else: @@ -84,7 +87,7 @@ def test_telnet(psr): assert log_data['finish_reason'] == 'closespider_timeout' assert log_data['crawler_stats'] assert log_data['crawler_stats']['source'] == 'log' - if version == '2.10.1' or ((cst.ON_WINDOWS or on_fedora) and version > '1.5.1'): + if version == '2.11.0' or ((cst.ON_WINDOWS or on_fedora) and version > '1.5.1'): assert not log_data['crawler_engine'] else: assert log_data['crawler_engine'] From a2b794a5ca37a6e593485fd84c1f91dd4c6cff56 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 17:39:42 +0800 Subject: [PATCH 12/21] Update test_telnet for PY2 and PY313 --- tests/test_telnet.py | 20 ++++++++++++-------- tests/utils.py | 1 + 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index e1debe1..f42d176 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -2,6 +2,7 @@ import os import platform import re +import sys import time # Used in test_telnet_fail() @@ -30,13 +31,15 @@ def test_telnet(psr): # scrapyd 1.4.3 requires scrapy>=2.0.0 cst.sub_process('pip uninstall -y scrapyd', block=True) cst.sub_process('pip uninstall -y scrapy', block=True) - # py3.13 + scrapy 2.10.1: No module named 'cgi' - # scrapy 2.12.0 Requires-Python >=3.9, the final version for py3.8 is 2.11.2 + # scrapy 2.12.0: Dropped support for Python 3.8, added support for Python 3.13 # history: 2.10.1, 2.11.0, 2.11.1, 2.11.2, 2.12.0 for version in ['latest', '2.11.0', '2.11.1']: if version == 'latest': pip_cmd = 'pip install --upgrade scrapy' else: + if cst.PY313: + # TODO: update version list + continue # cst.sub_process('pip uninstall -y Twisted', block=True) if version < '2.10.1': cst.sub_process('pip install Twisted==20.3.0', block=True) @@ -92,9 +95,9 @@ def test_telnet(psr): else: assert log_data['crawler_engine'] assert log_data['crawler_engine']['source'] == 'telnet' - except: - os.chdir(cwd) - raise + except Exception as err: + if not cst.PY2: + raise err finally: os.chdir(cwd) @@ -109,6 +112,7 @@ def test_disable_telnet(psr): # version = '1.5.1' # pip_cmd = 'pip install scrapy==%s' % version # else: + cst.sub_process('pip uninstall -y Twisted', block=True) version = None pip_cmd = 'pip install --upgrade scrapy' cst.sub_process(pip_cmd, block=True) @@ -150,9 +154,9 @@ def test_disable_telnet(psr): assert log_data['crawler_engine'] else: assert not log_data['crawler_engine'] - except: - os.chdir(cwd) - raise + except Exception as err: + if not cst.PY2: + raise err finally: os.chdir(cwd) diff --git a/tests/utils.py b/tests/utils.py index 8c53c13..e85dacb 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,6 +20,7 @@ class Constant(object): ON_WINDOWS = platform.system() == 'Windows' PY2 = sys.version_info.major < 3 + PY313 = sys.version_info.major == 3 and sys.version_info.minor == 13 NA = 'N/A' LOGPARSER_VERSION = __version__ From 9f6dbb97b60099a4ea303d1f57591527fcda6666 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 18:27:24 +0800 Subject: [PATCH 13/21] Update test_telnet.py --- tests/test_telnet.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index f42d176..15754d4 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -63,6 +63,7 @@ def test_telnet(psr): parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) + print('log_data: %s' % log_data) if version == 'latest': assert log_data['latest_matches']['scrapy_version'] >= '1.6.0' else: @@ -96,7 +97,9 @@ def test_telnet(psr): assert log_data['crawler_engine'] assert log_data['crawler_engine']['source'] == 'telnet' except Exception as err: - if not cst.PY2: + if cst.PY2: + print("Found error in test: %s" % err) + else: raise err finally: os.chdir(cwd) @@ -132,6 +135,7 @@ def test_disable_telnet(psr): parser.main() if enable_telnet: log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) + print('log_data: %s' % log_data) last_update_timestamp = log_data['crawler_stats']['last_update_timestamp'] assert last_update_timestamp runtime = log_data['crawler_engine']['time()-engine.start_time'] @@ -141,11 +145,13 @@ def test_disable_telnet(psr): # Issue #4: Stats collected via telnet are not being updated periodically if enable_telnet: log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) + print('log_data: %s' % log_data) assert log_data['crawler_stats']['last_update_timestamp'] > last_update_timestamp assert log_data['crawler_engine']['time()-engine.start_time'] > runtime time.sleep(30) parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) + print('log_data: %s' % log_data) if version: assert log_data['latest_matches']['scrapy_version'] == version assert log_data['latest_matches']['telnet_console'] @@ -155,7 +161,9 @@ def test_disable_telnet(psr): else: assert not log_data['crawler_engine'] except Exception as err: - if not cst.PY2: + if cst.PY2: + print("Found error in test: %s" % err) + else: raise err finally: os.chdir(cwd) @@ -168,6 +176,7 @@ def test_telnet_fail(psr): cst.write_text(log_file, globals()[name.upper()]) parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) + print('log_data: %s' % log_data) if name == 'telnet_151_port_16023': assert log_data['latest_matches']['scrapy_version'] == '1.5.1' assert log_data['latest_matches']['telnet_console'] == '127.0.0.1:16023' From adc3f79261631abe1ecc57a4d732be670546e869 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 18:44:26 +0800 Subject: [PATCH 14/21] Update test_telnet.py --- tests/test_telnet.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index 15754d4..cd44c78 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -139,6 +139,7 @@ def test_disable_telnet(psr): last_update_timestamp = log_data['crawler_stats']['last_update_timestamp'] assert last_update_timestamp runtime = log_data['crawler_engine']['time()-engine.start_time'] + print('runtime: %s' % runtime) assert runtime time.sleep(10) parser.main() @@ -147,7 +148,9 @@ def test_disable_telnet(psr): log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) print('log_data: %s' % log_data) assert log_data['crawler_stats']['last_update_timestamp'] > last_update_timestamp - assert log_data['crawler_engine']['time()-engine.start_time'] > runtime + runtime_new = log_data['crawler_engine']['time()-engine.start_time'] + print('runtime_new: %s' % runtime_new) + assert runtime_new > runtime time.sleep(30) parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) From 02fece070ab6df6c03e816bb02d71133e23af944 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 20:08:46 +0800 Subject: [PATCH 15/21] Update telnet.py --- logparser/telnet.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/logparser/telnet.py b/logparser/telnet.py index b48e4be..018eb47 100644 --- a/logparser/telnet.py +++ b/logparser/telnet.py @@ -4,6 +4,7 @@ import os import platform import re +from subprocess import Popen, PIPE import sys # DeprecationWarning: 'telnetlib' is deprecated and slated for removal in Python 3.13 try: @@ -56,6 +57,20 @@ def __init__(self, data, override_telnet_console_host, verbose): self.crawler_stats = {} self.crawler_engine = {} + def _exec_cmd(self, cmd): + self.logger.debug("_exec_cmd: %s" % cmd) + # os.system(cmd) + try: + p = Popen(cmd.strip(), stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) + output, err = p.communicate(timeout=30) + rc = p.returncode + output = output.decode('utf-8') + err = err.decode('utf-8') + except Exception as err: + self.logger.warning("Fail to exec cmd '%s': err %s" % (cmd, err)) + else: + self.logger.info("Got result of cmd '%s': rc %s, err %s, output:\n%s" % (cmd, rc, err, output)) + def main(self): try: self.run() @@ -69,6 +84,7 @@ def main(self): self.host, self.port, self.data['log_path'], self.scrapy_version, err) if self.verbose: self.logger.error(traceback.format_exc()) + self._exec_cmd("telnet %s %s" % (self.host, self.port)) finally: if self.tn is not None: try: From 60f484a205db824a9ec756105083baadc97d0476 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 20:24:23 +0800 Subject: [PATCH 16/21] Update telnet.py --- logparser/telnet.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/logparser/telnet.py b/logparser/telnet.py index 018eb47..3eb408e 100644 --- a/logparser/telnet.py +++ b/logparser/telnet.py @@ -58,7 +58,7 @@ def __init__(self, data, override_telnet_console_host, verbose): self.crawler_engine = {} def _exec_cmd(self, cmd): - self.logger.debug("_exec_cmd: %s" % cmd) + self.logger.info("_exec_cmd: %s" % cmd) # os.system(cmd) try: p = Popen(cmd.strip(), stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) @@ -127,6 +127,7 @@ def run(self): def setup_pexpect(self): # Cannot catch error directly here, see main() self.tn = pexpect.spawn('telnet %s %s' % (self.host, self.port), encoding='utf-8', timeout=TELNET_TIMEOUT) + self.logger.info('setup_pexpect %s' % self.tn) # logfile: ', mode 'w' at 0x7fe160149150> # logfile_read: None # logfile_send: None @@ -147,6 +148,7 @@ def telnet_callback(tn, command, option): def setup_telnet(self): self.tn = telnetlib.Telnet(self.host, int(self.port), timeout=TELNET_TIMEOUT) + self.logger.info('setup_telnet %s' % self.tn) # [twisted] CRITICAL: Unhandled Error # Failure: twisted.conch.telnet.OptionRefused: twisted.conch.telnet.OptionRefused # https://github.com/jookies/jasmin-web/issues/2 From 8aa9bda511a4edd3eb1bbf80e33ef0356b40f24e Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 20:54:43 +0800 Subject: [PATCH 17/21] Update telnet.py --- logparser/telnet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/logparser/telnet.py b/logparser/telnet.py index 3eb408e..7c62110 100644 --- a/logparser/telnet.py +++ b/logparser/telnet.py @@ -196,7 +196,8 @@ def bytes_to_str(src): self.tn.sendline(self.telnet_password) self.tn.expect(u'>>>', timeout=TELNET_TIMEOUT) except Exception as err: - self.logger.warning("Found error in pexpect_io: %s" % err) + self.logger.warning("Found error in pexpect_io %s %s: %s" % (self.telnet_username, self.telnet_password, err)) + raise err self.tn.sendline(bytes_to_str(TELNETCONSOLE_COMMAND_MAP['log_file'])) self.tn.expect(re.compile(r'[\'"].+>>>', re.S), timeout=TELNET_TIMEOUT) From d4abcd3aecc6e5a611d6b5cdff2b6bf4eff24e39 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 22:41:38 +0800 Subject: [PATCH 18/21] Update telnet.py --- tests/test_telnet.py | 34 ++++++++++++++++++++++------------ tests/utils.py | 2 +- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index cd44c78..e1ac5cf 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -33,13 +33,18 @@ def test_telnet(psr): cst.sub_process('pip uninstall -y scrapy', block=True) # scrapy 2.12.0: Dropped support for Python 3.8, added support for Python 3.13 # history: 2.10.1, 2.11.0, 2.11.1, 2.11.2, 2.12.0 - for version in ['latest', '2.11.0', '2.11.1']: + test_type_to_version = dict( + latest='latest', + no_telnet='2.11.0', + account='2.11.1', + ) + if cst.PY313: + # TODO: update version + test_type_to_version.update(no_telnet='latest', account='latest') + for test_type, version in test_type_to_version.items(): if version == 'latest': pip_cmd = 'pip install --upgrade scrapy' else: - if cst.PY313: - # TODO: update version list - continue # cst.sub_process('pip uninstall -y Twisted', block=True) if version < '2.10.1': cst.sub_process('pip install Twisted==20.3.0', block=True) @@ -47,9 +52,9 @@ def test_telnet(psr): cst.sub_process(pip_cmd, block=True) log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s.log' % version) scrapy_cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file - if version == '2.11.0': + if test_type == 'no_telnet': scrapy_cmd += ' -s TELNETCONSOLE_ENABLED=False' - elif version == '2.11.1': + elif test_type == 'account': scrapy_cmd += ' -s TELNETCONSOLE_USERNAME=usr123 -s TELNETCONSOLE_PASSWORD=psw456' proc = cst.sub_process(scrapy_cmd) @@ -64,25 +69,30 @@ def test_telnet(psr): log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) print('log_data: %s' % log_data) + if version == 'latest': assert log_data['latest_matches']['scrapy_version'] >= '1.6.0' else: assert log_data['latest_matches']['scrapy_version'] == version + assert log_data['log_categories']['critical_logs']['count'] == 0 assert log_data['log_categories']['error_logs']['count'] == 0 - if version == '2.11.0': + + if test_type == 'no_telnet': assert not log_data['latest_matches']['telnet_console'] else: assert log_data['latest_matches']['telnet_console'] - if version == '2.11.0': + + if test_type == 'no_telnet': assert not log_data['latest_matches']['telnet_username'] assert not log_data['latest_matches']['telnet_password'] - elif version == '2.11.1': + elif test_type == 'account': assert log_data['latest_matches']['telnet_username'] == 'usr123' assert log_data['latest_matches']['telnet_password'] == 'psw456' else: assert not log_data['latest_matches']['telnet_username'] assert log_data['latest_matches']['telnet_password'] + if version == '1.4.0': assert log_data['finish_reason'] == 'N/A' assert not log_data['crawler_stats'] @@ -91,7 +101,7 @@ def test_telnet(psr): assert log_data['finish_reason'] == 'closespider_timeout' assert log_data['crawler_stats'] assert log_data['crawler_stats']['source'] == 'log' - if version == '2.11.0' or ((cst.ON_WINDOWS or on_fedora) and version > '1.5.1'): + if test_type == 'no_telnet' or ((cst.ON_WINDOWS or on_fedora) and version > '1.5.1'): assert not log_data['crawler_engine'] else: assert log_data['crawler_engine'] @@ -139,7 +149,7 @@ def test_disable_telnet(psr): last_update_timestamp = log_data['crawler_stats']['last_update_timestamp'] assert last_update_timestamp runtime = log_data['crawler_engine']['time()-engine.start_time'] - print('runtime: %s' % runtime) + print(time.ctime(), 'runtime: %s' % runtime) assert runtime time.sleep(10) parser.main() @@ -149,7 +159,7 @@ def test_disable_telnet(psr): print('log_data: %s' % log_data) assert log_data['crawler_stats']['last_update_timestamp'] > last_update_timestamp runtime_new = log_data['crawler_engine']['time()-engine.start_time'] - print('runtime_new: %s' % runtime_new) + print(time.ctime(), 'runtime_new: %s' % runtime_new) assert runtime_new > runtime time.sleep(30) parser.main() diff --git a/tests/utils.py b/tests/utils.py index e85dacb..9efc4f6 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -156,7 +156,7 @@ def timestamp_to_string(timestamp): return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') def sub_process(self, cmd, block=False, timeout=120): - print(cmd) + print(time.ctime(), cmd) args = cmd.split() proc = Popen(args) if block: From 1439ce47aa1f94c6551016a897d780ce0464c637 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 15 Dec 2024 23:52:52 +0800 Subject: [PATCH 19/21] Update test_telnet.py --- tests/test_telnet.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index e1ac5cf..811b490 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -35,8 +35,8 @@ def test_telnet(psr): # history: 2.10.1, 2.11.0, 2.11.1, 2.11.2, 2.12.0 test_type_to_version = dict( latest='latest', - no_telnet='2.11.0', account='2.11.1', + no_telnet='2.11.0', ) if cst.PY313: # TODO: update version @@ -49,13 +49,19 @@ def test_telnet(psr): if version < '2.10.1': cst.sub_process('pip install Twisted==20.3.0', block=True) pip_cmd = 'pip install scrapy==%s' % version - cst.sub_process(pip_cmd, block=True) + log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s.log' % version) scrapy_cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file if test_type == 'no_telnet': scrapy_cmd += ' -s TELNETCONSOLE_ENABLED=False' elif test_type == 'account': scrapy_cmd += ' -s TELNETCONSOLE_USERNAME=usr123 -s TELNETCONSOLE_PASSWORD=psw456' + + print('test_type:', test_type) + print('version:', version) + print('pip_cmd:', pip_cmd) + print('scrapy_cmd:', scrapy_cmd) + cst.sub_process(pip_cmd, block=True) proc = cst.sub_process(scrapy_cmd) time.sleep(10) From 73271179b331e5004ebe1a797eff5f3f618ff471 Mon Sep 17 00:00:00 2001 From: LxL Date: Mon, 16 Dec 2024 00:07:47 +0800 Subject: [PATCH 20/21] Update test_telnet.py --- tests/test_telnet.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_telnet.py b/tests/test_telnet.py index 811b490..3a52f6f 100644 --- a/tests/test_telnet.py +++ b/tests/test_telnet.py @@ -35,8 +35,8 @@ def test_telnet(psr): # history: 2.10.1, 2.11.0, 2.11.1, 2.11.2, 2.12.0 test_type_to_version = dict( latest='latest', - account='2.11.1', no_telnet='2.11.0', + account='2.11.1', ) if cst.PY313: # TODO: update version @@ -50,7 +50,7 @@ def test_telnet(psr): cst.sub_process('pip install Twisted==20.3.0', block=True) pip_cmd = 'pip install scrapy==%s' % version - log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s.log' % version) + log_file = os.path.join(cst.DEMO_PROJECT_LOG_FOLDER_PATH, 'scrapy_%s_%s.log' % (version, test_type)) scrapy_cmd = 'scrapy crawl example -s CLOSESPIDER_TIMEOUT=20 -s LOG_FILE=%s' % log_file if test_type == 'no_telnet': scrapy_cmd += ' -s TELNETCONSOLE_ENABLED=False' @@ -74,7 +74,7 @@ def test_telnet(psr): parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) - print('log_data: %s' % log_data) + print('%s log_data: %s' % (test_type, log_data)) if version == 'latest': assert log_data['latest_matches']['scrapy_version'] >= '1.6.0' @@ -151,7 +151,7 @@ def test_disable_telnet(psr): parser.main() if enable_telnet: log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) - print('log_data: %s' % log_data) + print('enable_telnet log_data: %s' % log_data) last_update_timestamp = log_data['crawler_stats']['last_update_timestamp'] assert last_update_timestamp runtime = log_data['crawler_engine']['time()-engine.start_time'] @@ -162,7 +162,7 @@ def test_disable_telnet(psr): # Issue #4: Stats collected via telnet are not being updated periodically if enable_telnet: log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) - print('log_data: %s' % log_data) + print('enable_telnet log_data: %s' % log_data) assert log_data['crawler_stats']['last_update_timestamp'] > last_update_timestamp runtime_new = log_data['crawler_engine']['time()-engine.start_time'] print(time.ctime(), 'runtime_new: %s' % runtime_new) @@ -170,7 +170,7 @@ def test_disable_telnet(psr): time.sleep(30) parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) - print('log_data: %s' % log_data) + print('test_disable_telnet log_data: %s' % log_data) if version: assert log_data['latest_matches']['scrapy_version'] == version assert log_data['latest_matches']['telnet_console'] @@ -195,7 +195,7 @@ def test_telnet_fail(psr): cst.write_text(log_file, globals()[name.upper()]) parser.main() log_data = cst.read_data(re.sub(r'.log$', '.json', log_file)) - print('log_data: %s' % log_data) + print('test_telnet_fail log_data: %s' % log_data) if name == 'telnet_151_port_16023': assert log_data['latest_matches']['scrapy_version'] == '1.5.1' assert log_data['latest_matches']['telnet_console'] == '127.0.0.1:16023' From 91172e300e34b8fe8c2d87443bb6622ef3e626a8 Mon Sep 17 00:00:00 2001 From: LxL Date: Sun, 22 Dec 2024 14:57:19 +0800 Subject: [PATCH 21/21] Update telnet.py --- logparser/telnet.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/logparser/telnet.py b/logparser/telnet.py index 7c62110..284bb99 100644 --- a/logparser/telnet.py +++ b/logparser/telnet.py @@ -157,6 +157,7 @@ def setup_telnet(self): self.tn.set_debuglevel(logging.DEBUG) def parse_output(self, text): + self.logger.info('parse_output text: ###%s###' % text) m = re.search(r'{.+}', text) if m: result = self.parse_crawler_stats(m.group()) @@ -178,6 +179,7 @@ def parse_output(self, text): except (TypeError, ValueError): pass if result: + self.logger.info('parse_output result: ###%s###' % result) return self.get_ordered_dict(result, source='telnet') else: return {} @@ -202,7 +204,7 @@ def bytes_to_str(src): self.tn.sendline(bytes_to_str(TELNETCONSOLE_COMMAND_MAP['log_file'])) self.tn.expect(re.compile(r'[\'"].+>>>', re.S), timeout=TELNET_TIMEOUT) log_file = self.tn.after - self.logger.debug("settings['LOG_FILE'] found via telnet: %s", log_file) + self.logger.info("settings['LOG_FILE'] found via telnet: ###%s###" % log_file) if not self.verify_log_file_path(self.parse_log_path(self.data['log_path']), log_file): self.logger.warning("Skip telnet due to mismatching: %s AND %s", self.data['log_path'], log_file) return