Dre4m Shell
Server IP : 85.214.239.14  /  Your IP : 3.138.67.56
Web Server : Apache/2.4.62 (Debian)
System : Linux h2886529.stratoserver.net 4.9.0 #1 SMP Tue Jan 9 19:45:01 MSK 2024 x86_64
User : www-data ( 33)
PHP Version : 7.4.18
Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare,
MySQL : OFF  |  cURL : OFF  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : ON  |  Pkexec : OFF
Directory :  /lib/python3/dist-packages/ansible_collections/community/aws/plugins/modules/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ HOME SHELL ]     

Current File : /lib/python3/dist-packages/ansible_collections/community/aws/plugins/modules/glue_crawler.py
#!/usr/bin/python
# Copyright: (c) 2018, Rob White (@wimnat)
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)

from __future__ import (absolute_import, division, print_function)
__metaclass__ = type


DOCUMENTATION = r'''
---
module: glue_crawler
version_added: 4.1.0
short_description: Manage an AWS Glue crawler
description:
  - Manage an AWS Glue crawler. See U(https://aws.amazon.com/glue/) for details.
  - Prior to release 5.0.0 this module was called C(community.aws.aws_glue_crawler).
    The usage did not change.
author:
  - 'Ivan Chekaldin (@ichekaldin)'
options:
  database_name:
    description:
      - The name of the database where results are written.
    type: str
  description:
    description:
      - Description of the crawler being defined.
    type: str
  name:
    description:
      - The name you assign to this crawler definition. It must be unique in your account.
    required: true
    type: str
  recrawl_policy:
    description:
      - A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.
    suboptions:
      recrawl_behavior:
        description:
          - Specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run.
          - Supported options are C(CRAWL_EVERYTHING) and C(CRAWL_NEW_FOLDERS_ONLY).
        type: str
    type: dict
  role:
    description:
      - The name or ARN of the IAM role associated with this crawler.
      - Required when I(state=present).
    type: str
  schema_change_policy:
    description:
      - The policy for the crawler's update and deletion behavior.
    suboptions:
      delete_behavior:
        description:
          - Defines the deletion behavior when the crawler finds a deleted object.
          - Supported options are C(LOG), C(DELETE_FROM_DATABASE), and C(DEPRECATE_IN_DATABASE).
        type: str
      update_behavior:
        description:
          - Defines the update behavior when the crawler finds a changed schema..
          - Supported options are C(LOG) and C(UPDATE_IN_DATABASE).
        type: str
    type: dict
  state:
    description:
      - Create or delete the AWS Glue crawler.
    required: true
    choices: [ 'present', 'absent' ]
    type: str
  table_prefix:
    description:
      - The table prefix used for catalog tables that are created.
    type: str
  targets:
    description:
      - A list of targets to crawl. See example below.
      - Required when I(state=present).
    type: dict
extends_documentation_fragment:
  - amazon.aws.aws
  - amazon.aws.ec2
  - amazon.aws.boto3
  - amazon.aws.tags
'''

EXAMPLES = r'''
# Note: These examples do not set authentication details, see the AWS Guide for details.

# Create an AWS Glue crawler
- community.aws.glue_crawler:
    name: my-glue-crawler
    database_name: my_database
    role: my-iam-role
    schema_change_policy:
      delete_behavior: DELETE_FROM_DATABASE
      update_behavior: UPDATE_IN_DATABASE
    recrawl_policy:
      recrawl_ehavior: CRAWL_EVERYTHING
    targets:
      S3Targets:
        - Path: "s3://my-bucket/prefix/folder/"
          ConnectionName: my-connection
          Exclusions:
            - "**.json"
            - "**.yml"
    state: present

# Delete an AWS Glue crawler
- community.aws.glue_crawler:
    name: my-glue-crawler
    state: absent
'''

RETURN = r'''
creation_time:
    description: The time and date that this crawler definition was created.
    returned: when state is present
    type: str
    sample: '2021-04-01T05:19:58.326000+00:00'
database_name:
    description: The name of the database where results are written.
    returned: when state is present
    type: str
    sample: my_table
description:
    description: Description of the crawler.
    returned: when state is present
    type: str
    sample: My crawler
last_updated:
    description: The time and date that this crawler definition was last updated.
    returned: when state is present
    type: str
    sample: '2021-04-01T05:19:58.326000+00:00'
name:
    description: The name of the AWS Glue crawler.
    returned: always
    type: str
    sample: my-glue-crawler
recrawl_policy:
    description: A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.
    returned: when state is present
    type: complex
    contains:
        RecrawlBehavior:
            description: Whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run.
            returned: when state is present
            type: str
            sample: CRAWL_EVERYTHING
role:
    description: The name or ARN of the IAM role associated with this crawler.
    returned: when state is present
    type: str
    sample: my-iam-role
schema_change_policy:
    description: The policy for the crawler's update and deletion behavior.
    returned: when state is present
    type: complex
    contains:
        DeleteBehavior:
            description: The deletion behavior when the crawler finds a deleted object.
            returned: when state is present
            type: str
            sample: DELETE_FROM_DATABASE
        UpdateBehavior:
            description: The update behavior when the crawler finds a changed schema.
            returned: when state is present
            type: str
            sample: UPDATE_IN_DATABASE

table_prefix:
    description: The table prefix used for catalog tables that are created.
    returned: when state is present
    type: str
    sample: my_prefix
targets:
    description: A list of targets to crawl.
    returned: when state is present
    type: complex
    contains:
        S3Targets:
            description: List of S3 targets.
            returned: when state is present
            type: list
        JdbcTargets:
            description: List of JDBC targets.
            returned: when state is present
            type: list
        MongoDBTargets:
            description: List of Mongo DB targets.
            returned: when state is present
            type: list
        DynamoDBTargets:
            description: List of DynamoDB targets.
            returned: when state is present
            type: list
        CatalogTargets:
            description: List of catalog targets.
            returned: when state is present
            type: list
'''

try:
    import botocore
except ImportError:
    pass  # Handled by AnsibleAWSModule

from ansible.module_utils.common.dict_transformations import camel_dict_to_snake_dict
from ansible.module_utils.common.dict_transformations import snake_dict_to_camel_dict

from ansible_collections.amazon.aws.plugins.module_utils.core import AnsibleAWSModule
from ansible_collections.amazon.aws.plugins.module_utils.core import is_boto3_error_code
from ansible_collections.amazon.aws.plugins.module_utils.ec2 import AWSRetry
from ansible_collections.amazon.aws.plugins.module_utils.ec2 import compare_aws_tags
from ansible_collections.amazon.aws.plugins.module_utils.iam import get_aws_account_info


def _get_glue_crawler(connection, module, glue_crawler_name):
    '''
    Get an AWS Glue crawler based on name. If not found, return None.
    '''
    try:
        return connection.get_crawler(aws_retry=True, Name=glue_crawler_name)['Crawler']
    except is_boto3_error_code('EntityNotFoundException'):
        return None
    except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:  # pylint: disable=duplicate-except
        module.fail_json_aws(e)


def _trim_targets(targets):
    return [_trim_target(t) for t in targets]


def _trim_target(target):
    """
    Some target types have optional parameters which AWS will fill in and return
    To compare the desired targets and the current targets we need to ignore the defaults
    """
    if not target:
        return None
    retval = target.copy()
    if not retval.get('Exclusions', None):
        retval.pop('Exclusions', None)
    return retval


def _compare_glue_crawler_params(user_params, current_params):
    '''
    Compare Glue crawler params. If there is a difference, return True immediately else return False
    '''
    if 'DatabaseName' in user_params and user_params['DatabaseName'] != current_params['DatabaseName']:
        return True
    if 'Description' in user_params and user_params['Description'] != current_params['Description']:
        return True
    if 'RecrawlPolicy' in user_params and user_params['RecrawlPolicy'] != current_params['RecrawlPolicy']:
        return True
    if 'Role' in user_params and user_params['Role'] != current_params['Role']:
        return True
    if 'SchemaChangePolicy' in user_params and user_params['SchemaChangePolicy'] != current_params['SchemaChangePolicy']:
        return True
    if 'TablePrefix' in user_params and user_params['TablePrefix'] != current_params['TablePrefix']:
        return True
    if 'Targets' in user_params:
        if 'S3Targets' in user_params['Targets']:
            if _trim_targets(user_params['Targets']['S3Targets']) != _trim_targets(current_params['Targets']['S3Targets']):
                return True
        if 'JdbcTargets' in user_params['Targets'] and user_params['Targets']['JdbcTargets'] != current_params['Targets']['JdbcTargets']:
            if _trim_targets(user_params['Targets']['JdbcTargets']) != _trim_targets(current_params['Targets']['JdbcTargets']):
                return True
        if 'MongoDBTargets' in user_params['Targets'] and user_params['Targets']['MongoDBTargets'] != current_params['Targets']['MongoDBTargets']:
            return True
        if 'DynamoDBTargets' in user_params['Targets'] and user_params['Targets']['DynamoDBTargets'] != current_params['Targets']['DynamoDBTargets']:
            return True
        if 'CatalogTargets' in user_params['Targets'] and user_params['Targets']['CatalogTargets'] != current_params['Targets']['CatalogTargets']:
            return True

    return False


def ensure_tags(connection, module, glue_crawler):
    changed = False

    if module.params.get('tags') is None:
        return False

    account_id, partition = get_aws_account_info(module)
    arn = 'arn:{0}:glue:{1}:{2}:crawler/{3}'.format(partition, module.region, account_id, module.params.get('name'))

    try:
        existing_tags = connection.get_tags(aws_retry=True, ResourceArn=arn).get('Tags', {})
    except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
        if module.check_mode:
            existing_tags = {}
        else:
            module.fail_json_aws(e, msg='Unable to get tags for Glue crawler %s' % module.params.get('name'))

    tags_to_add, tags_to_remove = compare_aws_tags(existing_tags, module.params.get('tags'), module.params.get('purge_tags'))

    if tags_to_remove:
        changed = True
        if not module.check_mode:
            try:
                connection.untag_resource(aws_retry=True, ResourceArn=arn, TagsToRemove=tags_to_remove)
            except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
                module.fail_json_aws(e, msg='Unable to set tags for Glue crawler %s' % module.params.get('name'))

    if tags_to_add:
        changed = True
        if not module.check_mode:
            try:
                connection.tag_resource(aws_retry=True, ResourceArn=arn, TagsToAdd=tags_to_add)
            except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
                module.fail_json_aws(e, msg='Unable to set tags for Glue crawler %s' % module.params.get('name'))

    return changed


def create_or_update_glue_crawler(connection, module, glue_crawler):
    '''
    Create or update an AWS Glue crawler
    '''

    changed = False
    params = dict()
    params['Name'] = module.params.get('name')
    params['Role'] = module.params.get('role')
    params['Targets'] = module.params.get('targets')
    if module.params.get('database_name') is not None:
        params['DatabaseName'] = module.params.get('database_name')
    if module.params.get('description') is not None:
        params['Description'] = module.params.get('description')
    if module.params.get('recrawl_policy') is not None:
        params['RecrawlPolicy'] = snake_dict_to_camel_dict(module.params.get('recrawl_policy'), capitalize_first=True)
    if module.params.get('role') is not None:
        params['Role'] = module.params.get('role')
    if module.params.get('schema_change_policy') is not None:
        params['SchemaChangePolicy'] = snake_dict_to_camel_dict(module.params.get('schema_change_policy'), capitalize_first=True)
    if module.params.get('table_prefix') is not None:
        params['TablePrefix'] = module.params.get('table_prefix')
    if module.params.get('targets') is not None:
        params['Targets'] = module.params.get('targets')

    if glue_crawler:
        if _compare_glue_crawler_params(params, glue_crawler):
            try:
                if not module.check_mode:
                    connection.update_crawler(aws_retry=True, **params)
                changed = True
            except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
                module.fail_json_aws(e)
    else:
        try:
            if not module.check_mode:
                connection.create_crawler(aws_retry=True, **params)
            changed = True
        except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
            module.fail_json_aws(e)

    glue_crawler = _get_glue_crawler(connection, module, params['Name'])

    changed |= ensure_tags(connection, module, glue_crawler)

    module.exit_json(changed=changed, **camel_dict_to_snake_dict(glue_crawler or {}, ignore_list=['SchemaChangePolicy', 'RecrawlPolicy', 'Targets']))


def delete_glue_crawler(connection, module, glue_crawler):
    '''
    Delete an AWS Glue crawler
    '''
    changed = False

    if glue_crawler:
        try:
            if not module.check_mode:
                connection.delete_crawler(aws_retry=True, Name=glue_crawler['Name'])
            changed = True
        except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
            module.fail_json_aws(e)

    module.exit_json(changed=changed)


def main():

    argument_spec = (
        dict(
            database_name=dict(type='str'),
            description=dict(type='str'),
            name=dict(required=True, type='str'),
            purge_tags=dict(type='bool', default=True),
            recrawl_policy=dict(type='dict', options=dict(
                recrawl_behavior=dict(type='str')
            )),
            role=dict(type='str'),
            schema_change_policy=dict(type='dict', options=dict(
                delete_behavior=dict(type='str'),
                update_behavior=dict(type='str')
            )),
            state=dict(required=True, choices=['present', 'absent'], type='str'),
            table_prefix=dict(type='str'),
            tags=dict(type='dict', aliases=['resource_tags']),
            targets=dict(type='dict')
        )
    )

    module = AnsibleAWSModule(argument_spec=argument_spec,
                              required_if=[
                                  ('state', 'present', ['role', 'targets'])
                              ],
                              supports_check_mode=True
                              )

    connection = module.client('glue', retry_decorator=AWSRetry.jittered_backoff(retries=10))

    state = module.params.get('state')

    glue_crawler = _get_glue_crawler(connection, module, module.params.get('name'))

    if state == 'present':
        create_or_update_glue_crawler(connection, module, glue_crawler)
    else:
        delete_glue_crawler(connection, module, glue_crawler)


if __name__ == '__main__':
    main()

Anon7 - 2022
AnonSec Team