Server IP : 85.214.239.14 / Your IP : 3.138.67.56 Web Server : Apache/2.4.62 (Debian) System : Linux h2886529.stratoserver.net 4.9.0 #1 SMP Tue Jan 9 19:45:01 MSK 2024 x86_64 User : www-data ( 33) PHP Version : 7.4.18 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare, MySQL : OFF | cURL : OFF | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : OFF Directory : /lib/python3/dist-packages/ansible_collections/community/aws/plugins/modules/ |
Upload File : |
#!/usr/bin/python # Copyright: (c) 2018, Rob White (@wimnat) # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) from __future__ import (absolute_import, division, print_function) __metaclass__ = type DOCUMENTATION = r''' --- module: glue_crawler version_added: 4.1.0 short_description: Manage an AWS Glue crawler description: - Manage an AWS Glue crawler. See U(https://aws.amazon.com/glue/) for details. - Prior to release 5.0.0 this module was called C(community.aws.aws_glue_crawler). The usage did not change. author: - 'Ivan Chekaldin (@ichekaldin)' options: database_name: description: - The name of the database where results are written. type: str description: description: - Description of the crawler being defined. type: str name: description: - The name you assign to this crawler definition. It must be unique in your account. required: true type: str recrawl_policy: description: - A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run. suboptions: recrawl_behavior: description: - Specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run. - Supported options are C(CRAWL_EVERYTHING) and C(CRAWL_NEW_FOLDERS_ONLY). type: str type: dict role: description: - The name or ARN of the IAM role associated with this crawler. - Required when I(state=present). type: str schema_change_policy: description: - The policy for the crawler's update and deletion behavior. suboptions: delete_behavior: description: - Defines the deletion behavior when the crawler finds a deleted object. - Supported options are C(LOG), C(DELETE_FROM_DATABASE), and C(DEPRECATE_IN_DATABASE). type: str update_behavior: description: - Defines the update behavior when the crawler finds a changed schema.. - Supported options are C(LOG) and C(UPDATE_IN_DATABASE). type: str type: dict state: description: - Create or delete the AWS Glue crawler. required: true choices: [ 'present', 'absent' ] type: str table_prefix: description: - The table prefix used for catalog tables that are created. type: str targets: description: - A list of targets to crawl. See example below. - Required when I(state=present). type: dict extends_documentation_fragment: - amazon.aws.aws - amazon.aws.ec2 - amazon.aws.boto3 - amazon.aws.tags ''' EXAMPLES = r''' # Note: These examples do not set authentication details, see the AWS Guide for details. # Create an AWS Glue crawler - community.aws.glue_crawler: name: my-glue-crawler database_name: my_database role: my-iam-role schema_change_policy: delete_behavior: DELETE_FROM_DATABASE update_behavior: UPDATE_IN_DATABASE recrawl_policy: recrawl_ehavior: CRAWL_EVERYTHING targets: S3Targets: - Path: "s3://my-bucket/prefix/folder/" ConnectionName: my-connection Exclusions: - "**.json" - "**.yml" state: present # Delete an AWS Glue crawler - community.aws.glue_crawler: name: my-glue-crawler state: absent ''' RETURN = r''' creation_time: description: The time and date that this crawler definition was created. returned: when state is present type: str sample: '2021-04-01T05:19:58.326000+00:00' database_name: description: The name of the database where results are written. returned: when state is present type: str sample: my_table description: description: Description of the crawler. returned: when state is present type: str sample: My crawler last_updated: description: The time and date that this crawler definition was last updated. returned: when state is present type: str sample: '2021-04-01T05:19:58.326000+00:00' name: description: The name of the AWS Glue crawler. returned: always type: str sample: my-glue-crawler recrawl_policy: description: A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run. returned: when state is present type: complex contains: RecrawlBehavior: description: Whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run. returned: when state is present type: str sample: CRAWL_EVERYTHING role: description: The name or ARN of the IAM role associated with this crawler. returned: when state is present type: str sample: my-iam-role schema_change_policy: description: The policy for the crawler's update and deletion behavior. returned: when state is present type: complex contains: DeleteBehavior: description: The deletion behavior when the crawler finds a deleted object. returned: when state is present type: str sample: DELETE_FROM_DATABASE UpdateBehavior: description: The update behavior when the crawler finds a changed schema. returned: when state is present type: str sample: UPDATE_IN_DATABASE table_prefix: description: The table prefix used for catalog tables that are created. returned: when state is present type: str sample: my_prefix targets: description: A list of targets to crawl. returned: when state is present type: complex contains: S3Targets: description: List of S3 targets. returned: when state is present type: list JdbcTargets: description: List of JDBC targets. returned: when state is present type: list MongoDBTargets: description: List of Mongo DB targets. returned: when state is present type: list DynamoDBTargets: description: List of DynamoDB targets. returned: when state is present type: list CatalogTargets: description: List of catalog targets. returned: when state is present type: list ''' try: import botocore except ImportError: pass # Handled by AnsibleAWSModule from ansible.module_utils.common.dict_transformations import camel_dict_to_snake_dict from ansible.module_utils.common.dict_transformations import snake_dict_to_camel_dict from ansible_collections.amazon.aws.plugins.module_utils.core import AnsibleAWSModule from ansible_collections.amazon.aws.plugins.module_utils.core import is_boto3_error_code from ansible_collections.amazon.aws.plugins.module_utils.ec2 import AWSRetry from ansible_collections.amazon.aws.plugins.module_utils.ec2 import compare_aws_tags from ansible_collections.amazon.aws.plugins.module_utils.iam import get_aws_account_info def _get_glue_crawler(connection, module, glue_crawler_name): ''' Get an AWS Glue crawler based on name. If not found, return None. ''' try: return connection.get_crawler(aws_retry=True, Name=glue_crawler_name)['Crawler'] except is_boto3_error_code('EntityNotFoundException'): return None except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: # pylint: disable=duplicate-except module.fail_json_aws(e) def _trim_targets(targets): return [_trim_target(t) for t in targets] def _trim_target(target): """ Some target types have optional parameters which AWS will fill in and return To compare the desired targets and the current targets we need to ignore the defaults """ if not target: return None retval = target.copy() if not retval.get('Exclusions', None): retval.pop('Exclusions', None) return retval def _compare_glue_crawler_params(user_params, current_params): ''' Compare Glue crawler params. If there is a difference, return True immediately else return False ''' if 'DatabaseName' in user_params and user_params['DatabaseName'] != current_params['DatabaseName']: return True if 'Description' in user_params and user_params['Description'] != current_params['Description']: return True if 'RecrawlPolicy' in user_params and user_params['RecrawlPolicy'] != current_params['RecrawlPolicy']: return True if 'Role' in user_params and user_params['Role'] != current_params['Role']: return True if 'SchemaChangePolicy' in user_params and user_params['SchemaChangePolicy'] != current_params['SchemaChangePolicy']: return True if 'TablePrefix' in user_params and user_params['TablePrefix'] != current_params['TablePrefix']: return True if 'Targets' in user_params: if 'S3Targets' in user_params['Targets']: if _trim_targets(user_params['Targets']['S3Targets']) != _trim_targets(current_params['Targets']['S3Targets']): return True if 'JdbcTargets' in user_params['Targets'] and user_params['Targets']['JdbcTargets'] != current_params['Targets']['JdbcTargets']: if _trim_targets(user_params['Targets']['JdbcTargets']) != _trim_targets(current_params['Targets']['JdbcTargets']): return True if 'MongoDBTargets' in user_params['Targets'] and user_params['Targets']['MongoDBTargets'] != current_params['Targets']['MongoDBTargets']: return True if 'DynamoDBTargets' in user_params['Targets'] and user_params['Targets']['DynamoDBTargets'] != current_params['Targets']['DynamoDBTargets']: return True if 'CatalogTargets' in user_params['Targets'] and user_params['Targets']['CatalogTargets'] != current_params['Targets']['CatalogTargets']: return True return False def ensure_tags(connection, module, glue_crawler): changed = False if module.params.get('tags') is None: return False account_id, partition = get_aws_account_info(module) arn = 'arn:{0}:glue:{1}:{2}:crawler/{3}'.format(partition, module.region, account_id, module.params.get('name')) try: existing_tags = connection.get_tags(aws_retry=True, ResourceArn=arn).get('Tags', {}) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: if module.check_mode: existing_tags = {} else: module.fail_json_aws(e, msg='Unable to get tags for Glue crawler %s' % module.params.get('name')) tags_to_add, tags_to_remove = compare_aws_tags(existing_tags, module.params.get('tags'), module.params.get('purge_tags')) if tags_to_remove: changed = True if not module.check_mode: try: connection.untag_resource(aws_retry=True, ResourceArn=arn, TagsToRemove=tags_to_remove) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg='Unable to set tags for Glue crawler %s' % module.params.get('name')) if tags_to_add: changed = True if not module.check_mode: try: connection.tag_resource(aws_retry=True, ResourceArn=arn, TagsToAdd=tags_to_add) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg='Unable to set tags for Glue crawler %s' % module.params.get('name')) return changed def create_or_update_glue_crawler(connection, module, glue_crawler): ''' Create or update an AWS Glue crawler ''' changed = False params = dict() params['Name'] = module.params.get('name') params['Role'] = module.params.get('role') params['Targets'] = module.params.get('targets') if module.params.get('database_name') is not None: params['DatabaseName'] = module.params.get('database_name') if module.params.get('description') is not None: params['Description'] = module.params.get('description') if module.params.get('recrawl_policy') is not None: params['RecrawlPolicy'] = snake_dict_to_camel_dict(module.params.get('recrawl_policy'), capitalize_first=True) if module.params.get('role') is not None: params['Role'] = module.params.get('role') if module.params.get('schema_change_policy') is not None: params['SchemaChangePolicy'] = snake_dict_to_camel_dict(module.params.get('schema_change_policy'), capitalize_first=True) if module.params.get('table_prefix') is not None: params['TablePrefix'] = module.params.get('table_prefix') if module.params.get('targets') is not None: params['Targets'] = module.params.get('targets') if glue_crawler: if _compare_glue_crawler_params(params, glue_crawler): try: if not module.check_mode: connection.update_crawler(aws_retry=True, **params) changed = True except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e) else: try: if not module.check_mode: connection.create_crawler(aws_retry=True, **params) changed = True except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e) glue_crawler = _get_glue_crawler(connection, module, params['Name']) changed |= ensure_tags(connection, module, glue_crawler) module.exit_json(changed=changed, **camel_dict_to_snake_dict(glue_crawler or {}, ignore_list=['SchemaChangePolicy', 'RecrawlPolicy', 'Targets'])) def delete_glue_crawler(connection, module, glue_crawler): ''' Delete an AWS Glue crawler ''' changed = False if glue_crawler: try: if not module.check_mode: connection.delete_crawler(aws_retry=True, Name=glue_crawler['Name']) changed = True except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e) module.exit_json(changed=changed) def main(): argument_spec = ( dict( database_name=dict(type='str'), description=dict(type='str'), name=dict(required=True, type='str'), purge_tags=dict(type='bool', default=True), recrawl_policy=dict(type='dict', options=dict( recrawl_behavior=dict(type='str') )), role=dict(type='str'), schema_change_policy=dict(type='dict', options=dict( delete_behavior=dict(type='str'), update_behavior=dict(type='str') )), state=dict(required=True, choices=['present', 'absent'], type='str'), table_prefix=dict(type='str'), tags=dict(type='dict', aliases=['resource_tags']), targets=dict(type='dict') ) ) module = AnsibleAWSModule(argument_spec=argument_spec, required_if=[ ('state', 'present', ['role', 'targets']) ], supports_check_mode=True ) connection = module.client('glue', retry_decorator=AWSRetry.jittered_backoff(retries=10)) state = module.params.get('state') glue_crawler = _get_glue_crawler(connection, module, module.params.get('name')) if state == 'present': create_or_update_glue_crawler(connection, module, glue_crawler) else: delete_glue_crawler(connection, module, glue_crawler) if __name__ == '__main__': main()