From 612c895ec20023d08381426bed8239ddeea95e41 Mon Sep 17 00:00:00 2001 From: Hetang Modi Date: Wed, 16 Feb 2022 20:38:24 +0530 Subject: [PATCH 1/2] ci(semgrep): fix semgrep and lint failures --- .semgrepignore | 13 +++++++++++ cloudconnectlib/common/util.py | 2 +- cloudconnectlib/configuration/loader.py | 7 ++++-- cloudconnectlib/core/engine.py | 10 ++++++-- cloudconnectlib/core/http.py | 5 +++- cloudconnectlib/core/plugin.py | 6 ++++- cloudconnectlib/core/task.py | 18 +++++++++++---- cloudconnectlib/core/template.py | 5 +++- .../splunktacollectorlib/common/__init__.py | 2 +- .../splunktacollectorlib/config.py | 23 +++++++++++++------ .../data_collection/ta_data_collector.py | 13 +++++++---- .../data_collection/ta_data_loader.py | 2 +- .../data_collection/ta_helper.py | 10 +++++--- .../data_collection/ta_mod_input.py | 14 ++++++----- .../splunk_ta_import_declare.py | 2 +- 15 files changed, 97 insertions(+), 35 deletions(-) create mode 100644 .semgrepignore diff --git a/.semgrepignore b/.semgrepignore new file mode 100644 index 0000000..85c66b9 --- /dev/null +++ b/.semgrepignore @@ -0,0 +1,13 @@ +# Ignore git items +.gitignore +.git/ +:include .gitignore + +# Test cases, examples and workflows +tests/ +.github/ +examples/ +tool/ + +# For removed files +deprecated_files/ \ No newline at end of file diff --git a/cloudconnectlib/common/util.py b/cloudconnectlib/common/util.py index 1e188ae..e1f9534 100755 --- a/cloudconnectlib/common/util.py +++ b/cloudconnectlib/common/util.py @@ -48,7 +48,7 @@ def load_json_file(file_path): :param file_path: JSON file path. :return: A `dict` object. """ - with open(file_path) as file_pointer: + with open(file_path, encoding="UTF-8") as file_pointer: return json.load(file_pointer) diff --git a/cloudconnectlib/configuration/loader.py b/cloudconnectlib/configuration/loader.py index c9ac7a6..6ea3230 100755 --- a/cloudconnectlib/configuration/loader.py +++ b/cloudconnectlib/configuration/loader.py @@ -64,7 +64,7 @@ def _get_schema_from_file(schema_file): """ try: return load_json_file(schema_file) - except: + except Exception: raise ConfigException( "Cannot load schema from file {}: {}".format( schema_file, traceback.format_exc() @@ -236,7 +236,10 @@ def _load_processor(self, processor): pipeline = self._parse_tasks(processor.get("pipeline", [])) return Processor(skip_conditions=skip_conditions, pipeline=pipeline) - def _load_request(self, request): + # nosemgrep reason - false positive, `request` is a dict here + def _load_request( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit + self, request + ): options = self._load_options(request["request"]) pre_process = self._load_processor(request.get("pre_process", {})) diff --git a/cloudconnectlib/core/engine.py b/cloudconnectlib/core/engine.py index 4197451..21eaf38 100755 --- a/cloudconnectlib/core/engine.py +++ b/cloudconnectlib/core/engine.py @@ -92,7 +92,10 @@ class Job: reached it's stop condition. """ - def __init__(self, request, context, checkpoint_mgr, proxy=None): + # nosemgrep reason - false positive, `request` is a Munch object which has settings for making a request + def __init__( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit + self, request, context, checkpoint_mgr, proxy=None + ): """ Constructs a `Job` with properties request, context and a optional proxy setting. @@ -301,7 +304,10 @@ def _run(self): _logger.info("Stop condition reached, exit job now") break - def _send_request(self, request): + # nosemgrep reason - false positive, `request` is a dict here + def _send_request( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit + self, request + ): """Do send request with a simple error handling strategy. Refer to https://confluence.splunk.com/display/PROD/CC+1.0+-+Detail+Design""" try: diff --git a/cloudconnectlib/core/http.py b/cloudconnectlib/core/http.py index 0bcbd07..848353f 100755 --- a/cloudconnectlib/core/http.py +++ b/cloudconnectlib/core/http.py @@ -288,7 +288,10 @@ def _initialize_connection(self): _logger.info("Proxy is not enabled for http connection.") self._connection = self._build_http_connection(self._proxy_info) - def send(self, request): + # nosemgrep reason - false positive, `request` is a dict here having configs required for making request + def send( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit + self, request + ): if not request: raise ValueError("The request is none") if request.body and not isinstance(request.body, str): diff --git a/cloudconnectlib/core/plugin.py b/cloudconnectlib/core/plugin.py index c40451f..6b91d8b 100644 --- a/cloudconnectlib/core/plugin.py +++ b/cloudconnectlib/core/plugin.py @@ -86,7 +86,11 @@ def import_plugin_file(file_name): return try: - importlib.import_module(module_name) + # nosemgrep reason - this function only imports a custom cce_plugin_.py, + # which is provided by an addon and programmatically provided. + importlib.import_module( # nosemgrep: python.lang.security.audit.non-literal-import.non-literal-import + module_name + ) except Exception: logger.warning(f"Failed to load module {module_name}, {traceback.format_exc()}") return diff --git a/cloudconnectlib/core/task.py b/cloudconnectlib/core/task.py index e8246eb..8e81349 100644 --- a/cloudconnectlib/core/task.py +++ b/cloudconnectlib/core/task.py @@ -88,7 +88,10 @@ def render(self, context): class RequestTemplate: - def __init__(self, request): + # nosemgrep reason - false positive, `request` is a dict here having configs required for making request + def __init__( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit + self, request + ): if not request: raise ValueError("The request is none") url = request.get("url") @@ -295,7 +298,8 @@ def perform(self, context): try: invoke_results = self._process_handler.execute(context) except Exception: - logger.exception("Task=%s encountered exception", self) + # Fixing `python.lang.best-practice.logging-error-without-handling.logging-error-without-handling` + logger.warn("Task=%s encountered exception", self) raise CCESplitError if not invoke_results or not invoke_results.get(CCESplitTask.OUTPUT_KEY): raise CCESplitError @@ -316,7 +320,10 @@ class CCEHTTPRequestTask(BaseTask): from context when executing. """ - def __init__(self, request, name, meta_config=None, task_config=None, **kwargs): + # nosemgrep reason - false positive, `request` is a dict here having configs required for making request + def __init__( + self, request, name, meta_config=None, task_config=None, **kwargs + ): # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit """ :param verify: Absolute path to server certificate, otherwise uses requests' default certificate to verify server's TLS certificate. @@ -464,7 +471,10 @@ def _should_exit(self, done_count, context): return True return False - def _send_request(self, request): + # nosemgrep reason - false positive, `request` is a dict here having configs required for making request + def _send_request( + self, request + ): # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit try: response = self._http_client.send(request) except HTTPError as error: diff --git a/cloudconnectlib/core/template.py b/cloudconnectlib/core/template.py index fe8c1d4..90200aa 100755 --- a/cloudconnectlib/core/template.py +++ b/cloudconnectlib/core/template.py @@ -31,6 +31,9 @@ def translate_internal(context): if match: context_var = context.get(match.groups()[0]) return context_var if context_var else "" - return _template.render(context) + # nosemgrep reason - the `context` passed is provided by addon, and it should sanitized by the addon. + return _template.render( # nosemgrep: python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2 # noqa: E501 - semgrep name is too long + context + ) return translate_internal diff --git a/cloudconnectlib/splunktacollectorlib/common/__init__.py b/cloudconnectlib/splunktacollectorlib/common/__init__.py index 4c58c45..d5634b5 100755 --- a/cloudconnectlib/splunktacollectorlib/common/__init__.py +++ b/cloudconnectlib/splunktacollectorlib/common/__init__.py @@ -22,7 +22,7 @@ def load_schema_file(schema_file): Load schema file. """ - with open(schema_file) as f: + with open(schema_file, encoding="UTF-8") as f: ret = json.load(f) common = ret.get("_common_", dict()) diff --git a/cloudconnectlib/splunktacollectorlib/config.py b/cloudconnectlib/splunktacollectorlib/config.py index a88c75c..bce6ba0 100755 --- a/cloudconnectlib/splunktacollectorlib/config.py +++ b/cloudconnectlib/splunktacollectorlib/config.py @@ -149,8 +149,10 @@ def load(self): else: break else: - log(exc, level=logging.ERROR, need_tb=True) - raise exc + # F821 reason - when `ConfigException` would be caught, `exc` would be defined. + # The uncaught exceptions would break the code flow. + log(exc, level=logging.ERROR, need_tb=True) # noqa: F821 + raise exc # noqa: F821 log('"load" method out', level=logging.DEBUG) return ret @@ -185,7 +187,7 @@ def update_items( level=logging.DEBUG, ) - assert ( + assert ( # nosemgrep: gitlab.bandit.B101 - additional check for endpoint_id. Raises AssertionError otherwise endpoint_id in self._endpoints ), "Unexpected endpoint id in given schema - {ep_id}" "".format( ep_id=endpoint_id @@ -314,8 +316,11 @@ def _parse_schema(self, ucc_config_schema): } ) for field in Config.META_FIELDS: - assert field in ucc_config_schema and isinstance( - ucc_config_schema[field], str + assert ( # nosemgrep: gitlab.bandit.B101 - additional check for ucc_config_schema. + field in ucc_config_schema + and isinstance( + ucc_config_schema[field], str + ) ), ('Missing or invalid field "%s" in given schema' % field) setattr(self, field, ucc_config_schema[field]) @@ -324,10 +329,14 @@ def _parse_schema(self, ucc_config_schema): if key.startswith("_"): continue - assert isinstance(val, dict), ( + assert isinstance( # nosemgrep: gitlab.bandit.B101 - additional check `val` type. + val, dict + ), ( 'The schema of endpoint "%s" should be dict' % key ) - assert "endpoint" in val, 'The endpoint "%s" has no endpoint entry' % key + assert "endpoint" in val, ( # nosemgrep: gitlab.bandit.B101 - additional check for endpoint in `val`. + 'The endpoint "%s" has no endpoint entry' % key + ) self._endpoints[key] = val diff --git a/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_collector.py b/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_collector.py index 7d323d6..5e843d3 100755 --- a/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_collector.py +++ b/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_collector.py @@ -15,7 +15,6 @@ # limitations under the License. # import threading -import time from collections import namedtuple from splunktalib.common import util as scu @@ -110,8 +109,12 @@ def _build_event(self, events): events = [events] evts = [] for event in events: - assert event.raw_data, "the raw data of events is empty" - if event.is_unbroken: + assert ( # nosemgrep: gitlab.bandit.B101 - additional check for raw data in a single event. + event.raw_data + ), "the raw data of events is empty" + if ( + event.is_unbroken # nosemgrep: python.lang.maintainability.is-function-without-parentheses.is-function-without-parentheses - false positive, it is a property and not a function. # noqa: E501 - semgrep name is too long + ): evt = unbroken_evt_fmt.format( event.host or "", event.source or "", @@ -119,7 +122,9 @@ def _build_event(self, events): event.time or "", event.index or "", scu.escape_cdata(event.raw_data), - "" if event.is_done else "", + "" + if event.is_done # nosemgrep: python.lang.maintainability.is-function-without-parentheses.is-function-without-parentheses - false positive, it is a property and not a function. # noqa: E501 - semgrep name is too long + else "", ) else: evt = evt_fmt.format( diff --git a/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_loader.py b/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_loader.py index 3ad8d5d..dee5f7d 100755 --- a/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_loader.py +++ b/cloudconnectlib/splunktacollectorlib/data_collection/ta_data_loader.py @@ -86,7 +86,7 @@ def _enqueue_io_job(job): def _wait_for_tear_down(self): wakeup_q = self._wakeup_queue while 1: - try: + try: # nosemgrep: gitlab.bandit.B110 -- nothing to do when queue is empty. go_exit = wakeup_q.get(timeout=1) except queue.Empty: pass diff --git a/cloudconnectlib/splunktacollectorlib/data_collection/ta_helper.py b/cloudconnectlib/splunktacollectorlib/data_collection/ta_helper.py index bb9ddfa..0068a96 100755 --- a/cloudconnectlib/splunktacollectorlib/data_collection/ta_helper.py +++ b/cloudconnectlib/splunktacollectorlib/data_collection/ta_helper.py @@ -54,7 +54,9 @@ def get_md5(data): :param data: :return: """ - assert data is not None, "The input cannot be None" + assert ( # nosemgrep: gitlab.bandit.B101 - checking for data to not be None + data is not None + ), "The input cannot be None" if isinstance(data, str): return hashlib.sha256(data.encode("utf-8")).hexdigest() elif isinstance(data, (list, tuple, dict)): @@ -124,10 +126,12 @@ def _load_conf_contents(self): def _process_division(self, division_endpoint, division_contents): division_metrics = [] - assert isinstance(division_contents, dict) + assert isinstance( # nosemgrep: gitlab.bandit.B101 - checking for type of `division_contents` + division_contents, dict + ) for division_key, division_value in division_contents.items(): try: - assert ( + assert ( # nosemgrep: gitlab.bandit.B101 self.TYPE in division_value and division_value[self.TYPE] in [self.TYPE_SINGLE, self.TYPE_MULTI] and self.SEPARATOR in division_value diff --git a/cloudconnectlib/splunktacollectorlib/data_collection/ta_mod_input.py b/cloudconnectlib/splunktacollectorlib/data_collection/ta_mod_input.py index 654d7b3..c14d20c 100755 --- a/cloudconnectlib/splunktacollectorlib/data_collection/ta_mod_input.py +++ b/cloudconnectlib/splunktacollectorlib/data_collection/ta_mod_input.py @@ -35,7 +35,7 @@ from . import ta_checkpoint_manager as cpmgr from . import ta_config as tc from . import ta_data_client as tdc -from . import ta_data_loader as dl +from . import ta_data_loader as tdl utils.remove_http_proxy_env_vars() @@ -171,7 +171,7 @@ def run( # http://bugs.python.org/issue7980 time.strptime("2016-01-01", "%Y-%m-%d") - loader = dl.create_data_loader() + loader = tdl.create_data_loader() # handle signal _setup_signal_handler(loader, ta_short_name) @@ -272,8 +272,12 @@ def main( """ Main entry point """ - assert collector_cls, "ucc modinput collector is None." - assert schema_file_path, "ucc modinput schema file is None" + assert ( # nosemgrep: gitlab.bandit.B101 - check for required params to be passed + collector_cls + ), "ucc modinput collector is None." + assert ( # nosemgrep: gitlab.bandit.B101 - check for required params to be passed + schema_file_path + ), "ucc modinput schema file is None" settings = ld(schema_file_path) @@ -289,8 +293,6 @@ def main( ) elif args[1] == "--validate-arguments": sys.exit(validate_config()) - elif args[1] in ("-h", "--h", "--help"): - usage() else: usage() else: diff --git a/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py b/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py index 9aa813c..e6c2703 100755 --- a/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py +++ b/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py @@ -23,7 +23,7 @@ ta_name = os.path.basename(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ta_lib_name = re.sub(r"[^\w]+", "_", ta_name.lower()) -assert ta_name or ta_name == "package", "TA name is None or package" +assert ta_name or ta_name == "package", "TA name is None or package" # nosemgrep: gitlab.bandit.B101 - check for `ta_name` to be a valid value pattern = re.compile(r"[\\/]etc[\\/]apps[\\/][^\\/]+[\\/]bin[\\/]?$") new_paths = [path for path in sys.path if not pattern.search(path) or ta_name in path] new_paths.insert(0, os.path.sep.join([os.path.dirname(__file__), ta_lib_name])) From fdd3068ac029ada84e9feb00716c69a022a2ae81 Mon Sep 17 00:00:00 2001 From: Hetang Modi Date: Wed, 16 Feb 2022 20:47:56 +0530 Subject: [PATCH 2/2] chore(lint): fixing pre-commit failures --- cloudconnectlib/splunktacollectorlib/config.py | 6 +++++- .../splunktacollectorlib/splunk_ta_import_declare.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cloudconnectlib/splunktacollectorlib/config.py b/cloudconnectlib/splunktacollectorlib/config.py index bce6ba0..96c6184 100755 --- a/cloudconnectlib/splunktacollectorlib/config.py +++ b/cloudconnectlib/splunktacollectorlib/config.py @@ -316,12 +316,14 @@ def _parse_schema(self, ucc_config_schema): } ) for field in Config.META_FIELDS: + # fmt: off assert ( # nosemgrep: gitlab.bandit.B101 - additional check for ucc_config_schema. field in ucc_config_schema and isinstance( ucc_config_schema[field], str ) ), ('Missing or invalid field "%s" in given schema' % field) + # fmt: on setattr(self, field, ucc_config_schema[field]) self._endpoints = {} @@ -334,7 +336,9 @@ def _parse_schema(self, ucc_config_schema): ), ( 'The schema of endpoint "%s" should be dict' % key ) - assert "endpoint" in val, ( # nosemgrep: gitlab.bandit.B101 - additional check for endpoint in `val`. + assert ( # nosemgrep: gitlab.bandit.B101 - additional check for endpoint in `val`. + "endpoint" in val + ), ( 'The endpoint "%s" has no endpoint entry' % key ) diff --git a/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py b/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py index e6c2703..7735073 100755 --- a/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py +++ b/cloudconnectlib/splunktacollectorlib/splunk_ta_import_declare.py @@ -23,7 +23,9 @@ ta_name = os.path.basename(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ta_lib_name = re.sub(r"[^\w]+", "_", ta_name.lower()) -assert ta_name or ta_name == "package", "TA name is None or package" # nosemgrep: gitlab.bandit.B101 - check for `ta_name` to be a valid value +assert ( # nosemgrep: gitlab.bandit.B101 - check for `ta_name` to be a valid value + ta_name or ta_name == "package" +), "TA name is None or package" pattern = re.compile(r"[\\/]etc[\\/]apps[\\/][^\\/]+[\\/]bin[\\/]?$") new_paths = [path for path in sys.path if not pattern.search(path) or ta_name in path] new_paths.insert(0, os.path.sep.join([os.path.dirname(__file__), ta_lib_name]))