Configure the cli
aws configure list
aws configure list-profiles
~/.aws/credentials (Linux & Mac) or %USERPROFILE%\.aws\credentials (Windows)
https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html?icmpid=docs_sso_user_portal
A named profile is a collection of settings and credentials that you can apply to a AWS CLI command. When you specify a profile to run a command, the settings and credentials are used to run that command. Multiple named profiles can be stored in the config and credentials files.
You can specify one default profile that is used when no profile is explicitly referenced. Other profiles have names that you can specify as a parameter on the command line for individual commands. Alternatively, you can specify a profile in the AWS_PROFILE environment variable which overrides the default profile for commands that run in that session.
~/.aws/credentials (Linux & Mac) or %USERPROFILE%\.aws\credentials (Windows)
[default]
aws_access_key_id=AKIAIOSFODNN7EXAMPLE
aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
[user1]
aws_access_key_id=AKIAI44QH8DHBEXAMPLE
aws_secret_access_key=je7MtGbClwBF/2Zp9Utk/h3yCo8nvbEXAMPLEKEY
~/.aws/config (Linux & Mac) or %USERPROFILE%\.aws\config (Windows)
[default]
region=us-west-2
output=json
[profile user1]
region=us-east-1
output=text
aws logs describe-log-groups
Delete log group:
https://docs.aws.amazon.com/cli/latest/reference/logs/delete-log-group.html
aws logs delete-log-group --log-group-name <my-logs>
# e.g.
aws logs delete-log-group --log-group-name /aws/lambda/stepTwo
https://stackoverflow.com/questions/42134873/delete-all-log-streams-of-a-log-group-using-aws-cli
aws logs tail "/aws/lambda/your-lambda-name"
# or
aws logs tail "/aws/lambda/your-lambda-name" --follow
# Display the logs from the past 30 minutes
aws logs tail "/aws/lambda/your-lambda-name" --follow --since 30m
# Display the logs from the past 10 seconds
aws logs tail "/aws/lambda/your-lambda-name" --follow --since 10s
# Display the logs from the past 3 hours
aws logs tail "/aws/lambda/your-lambda-name" --follow --since 3h
# Display the logs from the past 2 days
aws logs tail "/aws/lambda/your-lambda-name" --follow --since 2d
# Display the logs from the past 3 weeks
aws logs tail "/aws/lambda/your-lambda-name" --follow --since 3w
aws logs tail "/aws/lambda/your-lambda-name" --follow --format short --filter-pattern "Hello"
aws logs tail "/aws/lambda/your-lambda-name" --follow --format short --filter-pattern "?Hello ?END"
https://aws.amazon.com/premiumsupport/knowledge-center/s3-multipart-upload-cli/
Delete single file in S3 at path.
aws s3 rm s3://test-bucket/your_file.pdf
Delete all files in bucket.
bucket=s3://your-bucket-name
aws s3 rm $bucket --recursive
create bucket
# create new bucket
aws s3api create-bucket --bucket mys3bucket-testupload1 --region us-east-1
upload items
aws s3 cp C:\S3Files\Script1.txt s3://mys3bucket-testupload1/
aws s3 cp <source> <target> --options
aws s3 cp large_test_file s3://DOC-EXAMPLE-BUCKET/
list items
# list items in bucket
aws s3 ls <target bucket>
run_test() {
local bucket=s3://your-bucket-name
local file_name=your_pdf_file_name.pdf
local local_file=local_dir/$file_name
# list objects in bucket
aws s3 ls $bucket
aws s3 rm $bucket/$file_name
# or
aws s3 rm $bucket --recursive
# copy local file to bucket
aws s3 cp $local_file $bucket
# list objects in bucket
aws s3 ls $bucket
}
Login to ECR
region=ca-central-1
user_id=3044958i3939
aws ecr get-login-password --region $region | docker login --username AWS --password-stdin $user_id.dkr.ecr.$region.amazonaws.com
Create new ECR repo
repo_name=my-repo
region=ca-central-1
aws ecr create-repository --repository-name $repo_name --region $region
Push Image to Repo
region=ca-central-1
user_id=3044958i3939
image_name=img_name
image_tag=latest
ecr_build_push() {
aws ecr get-login-password --region $region | docker login --username AWS --password-stdin $user_id.dkr.ecr.$region.amazonaws.com
docker build -t $image_name:$image_tag . --network=host
docker push $user_id.dkr.ecr.$region.amazonaws.com/$image_name:$image_tag
}
ecs_redeploy() {
# force re-create of ecs cluster after pushing new image to ecr
aws ecs update-service --cluster my-Cluster --service my-Service --force-new-deployment
}
Convert docker-compose.yaml to cloudformation
convert_docker_compose() {
# docker context create ener
docker context use ener
aws ecr get-login-password --region ca-central-1 | docker login --username AWS --password-stdin 12121212819.dkr.ecr.ca-central-1.amazonaws.com
docker compose convert > cloudformation.yaml
}
List Lambda Functions: https://docs.aws.amazon.com/cli/latest/reference/lambda/list-functions.html
aws lambda list-functions
# or
aws lambda list-functions | grep FunctionName
Delete Function: https://docs.aws.amazon.com/cli/latest/reference/lambda/delete-function.html
aws lambda delete-function --function-name step-func-example-StartDocumentAnalysis
Events
#!/bin/bash
run_redis() {
docker run -p 6379:6379 --name redis-container -d redis:latest
docker exec -it redis-container redis-cli ping
}
create_aws_role_data_lake() {
printf '
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"AWS": "arn:aws:iam::511644777240:root"
},
"Action": "sts:AssumeRole",
"Condition": {
"StringEquals": {
"sts:ExternalId": "a53aaa2d2-ab91-43-a19c-23094bf4ebb1"
}
}
}
]
}
' > role-trust-policy.json
aws iam create-role \
--role-name atlas-data-lake-role \
--assume-role-policy-document file://role-trust-policy.json
}
# create_aws_role_data_lake
create_aws_role_data_lake_policy() {
printf '{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:ListBucket",
"s3:GetObject",
"s3:GetObjectVersion",
"s3:GetBucketLocation"
],
"Resource": [
"arn:aws:s3:::your-s3-bucket",
"arn:aws:s3:::your-s3-bucket/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::your-s3-bucket",
"arn:aws:s3:::your-s3-bucket/*"
]
}
]
}' > adl-s3-policy.json
aws iam put-role-policy \
--role-name atlas-data-lake-role \
--policy-name atlas-data-lake-role-policy \
--policy-document file://adl-s3-policy.json
}
# create_aws_role_data_lake_policy
query_gql() {
url=https://realm.mongodb.com/api/client/v2.0/app/app-main-mgjbd/graphql
}
create_s3_comprehend_access_role() {
# https://docs.aws.amazon.com/comprehend/latest/dg/tutorial-reviews-create-role.html
printf \
'{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "comprehend.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}' > comprehend-trust-policy.json
aws iam create-role --role-name AmazonComprehendServiceRole-access-role \
--assume-role-policy-document file://./comprehend-trust-policy.json
# copy arn down
}
# create_s3_comprehend_access_role
role-trust-policy.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"AWS": "arn:aws:iam::432723721300:root"
},
"Action": "sts:AssumeRole",
"Condition": {
"StringEquals": {
"sts:ExternalId": "a53b15d2a19c-13094bf4ebb1"
}
}
}
]
}
adl-s3-policy.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:ListBucket",
"s3:GetObject",
"s3:GetObjectVersion",
"s3:GetBucketLocation"
],
"Resource": [
"arn:aws:s3:::my-s3-bucket-name",
"arn:aws:s3:::my-s3-bucket-name/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::my-s3-bucket-name",
"arn:aws:s3:::my-s3-bucket-name/*"
]
}
]
}
comprehend-trust-policy.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "comprehend.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
Amplify Tutorial:
https://docs.amplify.aws/cli/start/install/#option-2-follow-the-instructions
Figma Tutorial:
https://www.figma.com/file/83moMt1RpTi30AauBlXBno/AWS-Amplify-UI-Kit-(Community)?node-id=861%3A3635
https://docs.amplify.aws/cli/start/install/
curl -sL https://aws-amplify.github.io/amplify-cli/install | bash && $SHELL
configure
amplify configure
https://docs.aws.amazon.com/AmazonS3/latest/API/s3-api.pdf
https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-userguide.pdf
https://docs.aws.amazon.com/eventbridge/latest/APIReference/eventbridge-api.pdf
https://docs.aws.amazon.com/eventbridge/latest/userguide/user-guide.pdf
https://docs.aws.amazon.com/sns/latest/dg/sns-dg.pdf
https://docs.aws.amazon.com/sns/latest/api/sns-api.pdf
https://hands-on.cloud/using-terraform-to-deploy-s3-sqs-lambda-integration/
S3 event
{
"Records":[
{
"eventVersion":"2.1",
"eventSource":"aws:s3",
"awsRegion":"us-east-1",
"eventTime":"2021-04-26T23:31:08.107Z",
"eventName":"ObjectCreated:Put",
"userIdentity":{
"principalId":"AWS:012850762433:admin"
},
"requestParameters":{
"sourceIPAddress":"108.41.58.86"
},
"responseElements":{
"x-amz-request-id":"YP7DR0F7H7R1GN1S",
"x-amz-id-2":"WYvnoGQrVxe2LfV6yr/sDsZXj/QDL0vD02WQYn9zXg3jX2iKfq83omTmcOcIiuSUk4dTmRRDrhdNNzffoi8AeSBN7RHs2ab0"
},
"s3":{
"s3SchemaVersion":"1.0",
"configurationId":"tf-s3-queue-20210426224851886600000002",
"bucket":{
"name":"amaksimov-s3-sqs-demo-bucket",
"ownerIdentity":{
"principalId":"A1W385KKD8Q319"
},
"arn":"arn:aws:s3:::amaksimov-s3-sqs-demo-bucket"
},
"object":{
"key":"4.+Beginner%27s+Guide+to+AWS+Step+functions+-+HelloWorld+Example.png",
"size":9714,
"eTag":"b21c122beffd36c0f0caabc4dbd8b16d",
"sequencer":"0060874D3FC2FA681D"
}
}
}
]
}
sqs event
{
"Records":[
{
"messageId":"581db230-9853-4be3-a1fe-72c9a5b3e4d4",
"receiptHandle":"+/XzqGde+1gQ957YR8=",
"body":"{\"Records\":[{\"eventVersion\":\"2.1\",\"eventSource\":\"aws:s3\",\"awsRegion\":\"us-east-1\",\"eventTime\":\"2021-04-26T23:25:17.884Z\",\"eventName\":\"ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"AWS:012850762433:admin\"},\"requestParameters\":{\"sourceIPAddress\":\"108.41.58.86\"},\"responseElements\":{\"x-amz-request-id\":\"74CMGJPKH3HA1G87\",\"x-amz-id-2\":\"c52dEWNgb6rNUs7MNY20ArZHLgtNFiRJIhREfnNAnlLsXHotTUvLS7InfWnkniuawxPgTlkOkTKZICwIgsbfdHDZKQvL0LcV\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"tf-s3-queue-20210426224851886600000002\",\"bucket\":{\"name\":\"amaksimov-s3-sqs-demo-bucket\",\"ownerIdentity\":{\"principalId\":\"A1W385KKD8Q319\"},\"arn\":\"arn:aws:s3:::amaksimov-s3-sqs-demo-bucket\"},\"object\":{\"key\":\"6.+Beginner%27s+Guide+to+AWS+Step+functions+-+AWS+HelloWorld+example.png\",\"size\":458757,\"eTag\":\"e1148e80d0798b0e23502cbdae1fef58\",\"sequencer\":\"0060874BE06812C89A\"}}}]}",
"attributes":{
"ApproximateReceiveCount":"1",
"SentTimestamp":"1619479521272",
"SenderId":"AIDAJHIPRHEMV73VRJEBU",
"ApproximateFirstReceiveTimestamp":"1619479521279"
},
"messageAttributes":{
},
"md5OfBody":"7195d8d0f011fac4dc115b59d3e86797",
"eventSource":"aws:sqs",
"eventSourceARN":"arn:aws:sqs:us-east-1:012850762433:amaksimov-s3-event-notification-queue",
"awsRegion":"us-east-1"
}
]
}
Event
Can be deployed as:
Catch and Retry:
A Catch field and a Retry field add catch-and-retry logic to a state machine.
Catch ("Type": "Catch") is an array of objects that define a fallback state.
Retry ("Type": "Retry") is an array of objects that define a retry policy if the state encounters runtime errors.
Branching:
A Choice state adds branching logic to a state machine. Choice ("Type": "Choice") is an array of rules that determine which state the state machine transitions to next.
Chaining:
A "Chaining" pattern describes multiple Lambda functions connected together in a state machine.
You can use chaining to create reusable workflow invocations from a Task ("Type": "Task") state of a state machine.
Parallelism:
A Parallel state adds parallelism logic to a state machine.
You can use a Parallel state ("Type": "Parallel") to create parallel branches of invocation in your state machine.
Dynamic parallelism:
A Map state adds dynamic "for-each" loop logic to a state machine. You can use a Map state ("Type": "Map") to run a set of steps for each element of an input array in a state machine.
While the Parallel state invokes multiple branches of steps using the same input, a Map state invokes the same steps for multiple entries of the array.
Basic format of lambda function.
import boto3
import copy
import json
import os
from typing import Any
texttract_client = boto3.client('textract')
s3_client = boto3.client('s3')
def lambda_handler(event: dict, context: Any):
print("Processing Event:")
print(json.dumps(event))
# .. do stuff
return event
Resource | Quota |
---|---|
Function memory allocation | 128 MB to 10,240 MB, in 1-MB increments. |
Function timeout | 900 seconds (15 minutes) |
Function environment variables | 4 KB, for all environment variables associated with the function, in aggregate |
Function resource-based policy | 20 KB |
Function layers | Five layers |
Function burst concurrency | 500 - 3000 (varies per Region) |
Invocation payload (request and response) | 6 MB (synchronous) 256 KB (asynchronous) |
Deployment package (.zip file archive) size | 50 MB (zipped, for direct upload) 250 MB (unzipped) 3 MB (console editor) |
Container image code package size | 10 GB |
Test events (console editor) | 10 |
/tmp directory storage |
Between 512 MB and 10,240 MB, in 1-MB increments |
File descriptors | 1,024 |
Execution processes/threads | 1,024 |
Base Image:
Image:
ENV LANG=en_US.UTF-8
ENV TZ=:/etc/localtime
ENV PATH=/var/lang/bin:/usr/local/bin:/usr/bin/:/bin:/opt/bin
ENV LD_LIBRARY_PATH=/var/lang/lib:/lib64:/usr/lib64:/var/runtime:/var/runtime/lib:/var/task:/var/task/lib:/opt/lib
ENV LAMBDA_TASK_ROOT=/var/task
ENV LAMBDA_RUNTIME_DIR=/var/runtime
WORKDIR /var/task
ENTRYPOINT ["/lambda-entrypoint.sh"]
If your trying to use opencv-python you may get the following error:
ImportError: libGL.so.1: cannot open shared object file: No such file or directory
Solution:
yum install -y mesa-libGL
Reference:
https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/template-reference.html
https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-ug.pdf#template-reference
Reference:
Transform: AWS::Serverless-2016-10-31
Globals:
set of globals
Description:
String
Metadata:
template metadata
Parameters:
set of parameters
Mappings:
set of mappings
Conditions:
set of conditions
Resources:
set of resources
Outputs:
set of outputs
Allow Textract to Publish to SNS:
https://docs.aws.amazon.com/textract/latest/dg/api-async-roles.html
role
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "textract.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
policy
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"sns:Publish"
],
"Resource": "arn:aws:sns:*:*:AmazonTextract*"
}
]
}
https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements.html
https://blog.knoldus.com/how-to-setup-iam-roles-in-aws-using-terraform/
https://aws.amazon.com/premiumsupport/knowledge-center/ecs-fargate-access-aws-services/
Overview
Statement:
https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_statement.html
or
an array of individual statements.Sid
https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_sid.html
Action
https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_action.html
The Action element describes the specific action or actions that will be allowed or denied.
Statements must include either an Action or NotAction element.
Each AWS service has its own set of actions that describe tasks that you can perform with that service
You specify a value using a service namespace as an action prefix (iam, ec2, sqs, sns, s3, etc)..
S3 Actions:
EC2 Actions
IAM Actions:
Others:
Effect:
https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_effect.html
Allow
or Deny
Resource:
https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_resource.html
Principal
https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_principal.html
Service Principals:
"Principal": {
"Service": [
"ecs.amazonaws.com",
"elasticloadbalancing.amazonaws.com"
]
}
All Principals
https://docs.aws.amazon.com/step-functions/latest/dg/concepts-states.html
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/stepfunctions.html
https://docs.aws.amazon.com/step-functions/latest/dg/cw-events.html
Tutorials:
Using with other AWS Services:
Input / Output Processing:
Error States and Error Handling:
State Fields
States:
Do some work in your state machine (a Task state)
Make a choice between branches of execution (a Choice state)
Stop an execution with a failure or success (a Fail or Succeed state)
Examples:
// INPUT TO PASS
// {
// "product": {
// "name": "T-Shirt",
// "details": {
// "color": "green",
// "size": "large",
// "material": "nylon"
// },
// "availability": "in stock",
// "cost": "$23"
// }
// }
"MapValues": {
"Type": "Pass",
"Parameters": {
"product.$": "$.product.name",
"colour.$": "$.product.details.color",
"size.$": "$.product.details.size",
"availability.$": "$.product.availability"
}
"Next": "AnotherState"
}
// OUTPUT FROM PASS
// {
// "product": "T-Shirt",
// "colour": "green",
// "size": "large",
// "availability": "in stock"
// }
Provide a delay for a certain amount of time or until a specified time/date (a Wait state)
Example:
"wait_ten_seconds": {
"Type": "Wait",
"Seconds": 10,
"Next": "NextState"
}
or
"wait_until" : {
"Type": "Wait",
"TimestampPath": "$.expirydate",
"Next": "NextState"
}
or
"wait_until" : {
"Type": "Wait",
"Timestamp": "2016-03-14T01:59:00Z",
"Next": "NextState"
}
Begin parallel branches of execution (a Parallel state)
Dynamically iterate steps (a Map state)
After you have created and executed Express Workflows, and if logging is enabled, you can access information about the execution in Amazon CloudWatch Logs.
ErrorEquals (Required)
import boto3
import json
import os
import logging
client = boto3.client('stepfunctions')
LOGGER = logging.getLogger()
LOGGER.setLevel(logging.INFO)
def lambda_handler(event, context):
# print("Processing Event:")
# print(json.dumps(event))
e = json.loads(event)
LOGGER.info(e)
body = json.loads(event["Records"][0]["body"])
message = json.loads(body["Message"])
document_location = message["DocumentLocation"]
bucket_name = document_location["S3Bucket"]
key = document_location["S3ObjectName"]
key_split_on_slash = key.split("/")
join_with_dash = "-".join(key_split_on_slash)
join_split_on_colon = join_with_dash.split(":")
job_name = "_".join(join_split_on_colon)
job_id = message["JobId"]
status = message["Status"]
response = client.start_execution(
stateMachineArn = os.environ.get('STATE_MACHINE_ARN'),
input="{\"bucket_name\": \"" + bucket_name +
"\",\"key\": \"" + key +
"\",\"job_name\": \"" + job_name +
"\",\"job_id\": \"" + job_id +
"\",\"status\": \"" + status + "\"}"
)
return response["executionArn"]
aws configure set default.region us-east-1
Refs:
Summary:
Annotations
Provides the location of your entities in a number of documents so Comprehend can train on both the entity and its context. p.150
By submitting annotation along with your documents:
CER's created with PDF annotations can be used with plaintext, image files (JPG, PNG, TIFF), PDF files, and Word document. p.149
CER created with annotated PDF files supports english documents only.
Can train a model on up to 25 custom entities at once. p.149
Can be used for real-time entity detection and in entity detection. p.149
Entity Lists
Annotate your data with care and verify that you annotate every mention of the entity.
No Duplicates
Annotate All Documents in Trainset:
More annotations lead to better results.
You can train a model with the minimum number of documents and annotations, but adding data usually improves the model.
Provide documents that resemble real use cases as closely as possible.
No Synthetic Data
It is important that documents should be diverse in terms of word count.
Try and give the same data distribution for training as you expect to be using when you're actually detecting your custom entities (inference time).
PDF Annotation Requirements:
image files
PDFs
Word documents
250 input documents.
At least 100 annotations per entity.
PDF Annotation Tools:
Use SageMaker Ground Truth
to create a labeled dataset in an augmented manifest file.
Ground Truth is a data labeling service
that helps you (or a workforce that you employ) to build training datasets for machine learning models.
Comprehend accepts augmented manifest files as training data for custom models.
You can provide these files when you create a custom entity recognizer by using the Comprehend console or the CreateEntityRecognizer API action.
You can use the Ground Truth built-in task type, Named Entity Recognition, to create a labeling job to have workers identify entities in text.
See Named Entity Recognition in the Amazon SageMaker Developer Guide.
Use sagemaker ground truth to create pdf annotations.
Labeled dataset is stored in augmented manifest file.
Comprehend accepts augmented manifest files as training data for custom models. p.155
Augmented manifest files are in JSON lines format.
Using Ground Truth, you can define overlapping labels (text that you associate with more than one label).
Annotation files
/output/your labeling job name/annotations/
Augmented Manifest
- /output/your labeling job name/manifests/
three input documents.
25 annotations per entity.
ref: https://docs.aws.amazon.com/comprehend/latest/dg/cer-entity-list.html
Comprehend uses an intelligent algorithm to detect occurrences of the entity in the documents to serve as the basis for training the custom entity recognizer model.
To train a model using an entity list, you provide two pieces of information:
- A list of the entity names with their corresponding custom entity types.
- A collection of unannotated documents in which you expect your entities to appear.
- At least 200 entity matches per entity.
- Must be in .csv format.
- This .csv file must have the columns:
- Text
- Type
Example:
If this is our test
data.
"Jo Brown" is an engineer in the high tech industry.
"John Doe" has been a engineer for 14 years.
Emilio Johnson is a judge on the Washington Supreme Court.
Our latest new employee, "Jane Smith", has been a manager in the industry for 4 years.
The entity list would be:
Text, Type
Jo Brown, ENGINEER
John Doe, ENGINEER
Jane Smith, MANAGER
ENGINEER
, MANAGER
Text
, Type
NOTES
The order of the entities in your list has no effects on model training.
Use entity list items that cover 80%-100% of positive entity examples mentioned in the unannotated corpus of documents.
Avoid entity examples that match non-entities in the document corpus by removing common words and phrases.
Input data should not contain duplicates.
__Provide documents that resemble real use cases as closely as possible.__
__Don't use toy data or synthesized data for production systems.__
__The input data should be as diverse as possible to avoid overfitting and help underlying model better generalize on real examples.__
The entity list is case sensitive, and regular expressions are not currently supported.
can often still recognize entities even if they do not match exactly to the casing provided in the entity list.
If you have an entity that is a substring of another entity (such as “Smith” and “Jane Smith”), provide both in the entity list.
true positives (tp)
and false positives (fp)
and it is calculated as;
precision = tp / (tp + fp)
precision = 1 / (1 + 1)
This indicates the fraction of entities present in the documents that are correctly identified and labeled
by the system.
Mathematically, this is defined in terms of the total number of correct identifications true positives (tp) and missed identifcations false negatives (fn).
It is calculated as recall = tp / (tp + fn).
For example if a model correctly identifies one entity, but misses two other instances where that entity is present, the result is one true positive and two false negatives.
In this case, recall = 1 / (1 + 2). The recall is 33.33%, as one entity is correct out of a possible three examples.
- This is a combination of the Precision and Recall metrics, which measures the overall accuracy of the
model for custom entity recognition. The F1 score is the harmonic mean of the Precision and Recall
metrics: F1 = 2 * Precision * Recall / (Precision + Recall) .
Note
Intuitively, the harmonic mean penalizes the extremes more than the simple average or other
means (example: precision = 0, recall = 1 could be achieved trivially by predicting all
possible spans. Here, the simple average would be 0.5, but F1 would penalize it as 0).
precision = 50%
andrecall = 33.33%
,F1 = 2 * 0.5 * 0.3333 / (0.5 + 0.3333)
.3975
, or 39.75%
Extract medical information from unstructured medical text like:
HIPAA-eligible natural language processing (NLP) service that uses ML that's pre-trained to understand and extract health data from medical text
Identify relationships among extracted health information and link to medical ontologies like:
API's:
https://docs.aws.amazon.com/comprehend-medical/latest/dev/textanalysis-entitiesv2.html
Use the DetectEntitiesV2 to detect entities in single files or StartEntitiesDetectionV2Job for batch analysis on multiple files.
You can detect entities in the following categories:
Amazon Comprehend Medical detects information in the following classes:
https://docs.aws.amazon.com/comprehend-medical/latest/dev/textanalysis-entities.html
Use the DetectEntities operation to detect the medical entities in your text. It detects entities in the following categories:
Amazon Comprehend Medical detects information in the following classes:
PHI entitys are based on a list of 18 identifiers from the HIPAA Act.
Comprehend Medical detects entities associated with these identifiers but these entities don't map 1:1 to the list specified by the Safe Harbor method
.
https://docs.aws.amazon.com/comprehend-medical/latest/dev/textanalysis-phi.html
Entity | Description | HIPAA Category |
---|---|---|
AGE | All components of age, spans of age, and any age mentioned, be it patient or family member or others involved in the note. Default is in years unless otherwise noted. | 3. Dates related to an individual |
DATE | Any date related to patient or patient care. | 3. Dates related to an individual |
NAME | All names mentioned in the clinical note, typically belonging to patient, family, or provider. | 1. Name |
PHONE_OR_FAX | Any phone, fax, pager; excludes named phone numbers such as 1-800-QUIT-NOW as well as 911. | 4. Phone number 5. FAX number |
Any email address. | 6. Email addresses | |
ID | Any sort of number associated with the identity of a patient. This includes their social security number, medical record number, facility ID, clinical trial number, certificate/license number, vehicle/device number, biometric numbers, and provider identifiers. | 7. Social Security Number 8. Medical Record number 9. Health Plan number 10. Account numbers 11. Certificate/License numbers 12. Vehicle identifiers 13. Device numbers 16. Biometric information 18. Any other identifying characteristics |
URL | Any web URL. | 14. URLs |
ADDRESS | All geographical subdivisions of an address, including facilities, named medical facilities, or wards within a facility. | 2. Geographic location |
PROFESSION | Includes any profession or employer mentioned in a note as it pertains to the patient or the patient’s family. | 18. Any other identifying characteristics |
{
"Entities": [
{
"Id": 0,
"BeginOffset": 11,
"EndOffset": 21,
"Score": 0.997368335723877,
"Text": "John Smith",
"Category": "PROTECTED_HEALTH_INFORMATION",
"Type": "NAME",
"Traits": []
},
{
"Id": 1,
"BeginOffset": 25,
"EndOffset": 27,
"Score": 0.9998362064361572,
"Text": "48",
"Category": "PROTECTED_HEALTH_INFORMATION",
"Type": "AGE",
"Traits": []
},
{
"Id": 2,
"BeginOffset": 37,
"EndOffset": 44,
"Score": 0.8661606311798096,
"Text": "teacher",
"Category": "PROTECTED_HEALTH_INFORMATION",
"Type": "PROFESSION",
"Traits": []
},
{
"Id": 3,
"BeginOffset": 61,
"EndOffset": 68,
"Score": 0.9629441499710083,
"Text": "Seattle",
"Category": "PROTECTED_HEALTH_INFORMATION",
"Type": "ADDRESS",
"Traits": []
},
{
"Id": 4,
"BeginOffset": 78,
"EndOffset": 88,
"Score": 0.38217034935951233,
"Text": "Washington",
"Category": "PROTECTED_HEALTH_INFORMATION",
"Type": "ADDRESS",
"Traits": []
}
],
"UnmappedAttributes": []
}
client.delete_message(**kwargs)
{
"Service":"Amazon S3",
"Event":"s3:TestEvent",
"Time":"2021-04-27T13:57:03.224Z",
"Bucket":"amaksimov-s3-sqs-demo-bucket",
"RequestId":"MDSYJ6FFMMZ75MJ8",
"HostId":"bydBlxgzo+XD8x1szLD+YfeaN8DUtNoxEHsMDySKd1wuX1PKvuYx4h/Iw8uUM1wx/uImu1On5sI="
}
https://docs.aws.amazon.com/AmazonS3/latest/userguide/EventBridge.html
Event type | Description |
---|---|
Object Created |
An object was created. The reason field in the event message structure indicates which S3 API was used to create the object: PutObject, POST Object, CopyObject, or CompleteMultipartUpload. |
Object Deleted (DeleteObject) Object Deleted (Lifecycle expiration) |
An object was deleted. When an object is deleted using an S3 API call, the reason field is set to DeleteObject. When an object is deleted by an S3 Lifecycle expiration rule, the reason field is set to Lifecycle Expiration. For more information, see Expiring objects. When an unversioned object is deleted, or a versioned object is permanently deleted, the deletion-type field is set to Permanently Deleted. When a delete marker is created for a versioned object, the deletion-type field is set to Delete Marker Created. For more information, see Deleting object versions from a versioning-enabled bucket. |
Object Restore Initiated |
An object restore was initiated from S3 Glacier or S3 Glacier Deep Archive storage class or from S3 Intelligent-Tiering Archive Access or Deep Archive Access tier. For more information, see Working with archived objects. |
Object Restore Completed |
An object restore was completed. |
Object Restore Expired |
The temporary copy of an object restored from S3 Glacier or S3 Glacier Deep Archive expired and was deleted. |
Object Storage Class Changed |
An object was transitioned to a different storage class. For more information, see Transitioning objects using Amazon S3 Lifecycle. |
Object Access Tier Changed |
An object was transitioned to the S3 Intelligent-Tiering Archive Access tier or Deep Archive Access tier. For more information, see Amazon S3 Intelligent-Tiering. |
Object ACL Updated |
An object's access control list (ACL) was set using PutObjectACL. An event is not generated when a request results in no change to an object’s ACL. For more information, see Access control list (ACL) overview. |
Object Tags Added |
A set of tags was added to an object using PutObjectTagging. For more information, see Categorizing your storage using tags. |
Object Tags Deleted |
All tags were removed from an object using DeleteObjectTagging. For more information, see Categorizing your storage using tags. |
Event Types | Description |
---|---|
s3:TestEvent | When a notification is enabled, Amazon S3 publishes a test notification to ensure the topic exists and the bucket owner has permission to publish to it. If enabling the notification fails, no test notification is sent. |
s3:ObjectCreated: s3:ObjectCreated:Put s3:ObjectCreated:Post s3:ObjectCreated:Copy s3:ObjectCreated:CompleteMultipartUpload |
Amazon S3 API operations like PUT, POST, and COPY can create an object. Notifications can be enabled for specific operations or use s3:ObjectCreated: for all object creation events. Failed operations do not generate notifications. s3:ObjectCreated:CompleteMultipartUpload includes objects created with UploadPartCopy for Copy operations. |
s3:ObjectRemoved: s3:ObjectRemoved:Delete s3:ObjectRemoved:DeleteMarkerCreated |
Notifications can be enabled for object deletions or deletions of versioned objects. s3:ObjectRemoved:Delete notifies about permanent deletions, while s3:ObjectRemoved:DeleteMarkerCreated notifies about delete marker creations. Notifications are not sent for automatic deletions by Lifecycle policies or failed operations. |
s3:ObjectRestore: s3:ObjectRestore:Post s3:ObjectRestore:Completed s3:ObjectRestore:Delete |
Notifications can be enabled for object restoration initiation (s3:ObjectRestore:Post), completion (s3:ObjectRestore:Completed), and expiration of temporary restored copies (s3:ObjectRestore:Delete). Applies to objects in S3 Glacier Flexible Retrieval and Deep Archive storage classes. |
s3:ReducedRedundancyLostObject | Notifies when an object in Reduced Redundancy Storage (RRS) is lost. |
s3:Replication: s3:Replication:OperationFailedReplication s3:Replication:OperationMissedThreshold s3:Replication:OperationReplicatedAfterThreshold s3:Replication:OperationNotTracked |
Notifications can be enabled for replication failures (OperationFailedReplication), missed thresholds (OperationMissedThreshold), late replications (OperationReplicatedAfterThreshold), and untracked objects (OperationNotTracked). Useful for monitoring replication metrics and progress when using S3 Replication Time Control. |
s3:LifecycleExpiration: s3:LifecycleExpiration:Delete s3:LifecycleExpiration:DeleteMarkerCreated |
Notifications can be enabled for object deletions based on Lifecycle configurations. s3:LifecycleExpiration:Delete applies to unversioned bucket deletions and permanent deletions, while s3:LifecycleExpiration:DeleteMarkerCreated applies to delete marker creations for versioned buckets. |
s3:LifecycleTransition | Notifies when an object is transitioned to another storage class based on an S3 Lifecycle configuration. |
s3:IntelligentTiering | Notifies when an object in the S3 Intelligent-Tiering storage class moves to Archive Access or Deep Archive Access tier. |
s3:ObjectTagging: s3:ObjectTagging:Put s3:ObjectTagging:Delete |
Notifications can be enabled for adding (s3:ObjectTagging:Put) or deleting (s3:ObjectTagging:Delete) object tags. |
s3:ObjectAcl:Put | Notifies when an ACL is PUT on an object or modified. Notifications are not sent if the ACL remains unchanged. |
The S3 event consists of a list of records describing the object within the S3 bucket.
The most commonly used fields are:
event['Records'][*]['s3']['bucket']['name']#bucket name where the file has been uploaded
event['Records'][*]['s3']['object']['key']# file name and location within the S3 bucket
{
"Records":[
{
"eventVersion":"2.1",
"eventSource":"aws:s3",
"awsRegion":"us-east-1",
"eventTime":"2021-04-26T23:31:08.107Z",
"eventName":"ObjectCreated:Put",
"userIdentity":{
"principalId":"AWS:012850762433:admin"
},
"requestParameters":{
"sourceIPAddress":"108.41.58.86"
},
"responseElements":{
"x-amz-request-id":"YP7DR0F7H7R1GN1S",
"x-amz-id-2":"WYvnoGQrVxe2LfV6yr/sDsZXj/QDL0vD02WQYn9zXg3jX2iKfq83omTmcOcIiuSUk4dTmRRDrhdNNzffoi8AeSBN7RHs2ab0"
},
"s3":{
"s3SchemaVersion":"1.0",
"configurationId":"tf-s3-queue-20210426224851886600000002",
"bucket":{
"name":"amaksimov-s3-sqs-demo-bucket",
"ownerIdentity":{
"principalId":"A1W385KKD8Q319"
},
"arn":"arn:aws:s3:::amaksimov-s3-sqs-demo-bucket"
},
"object":{
"key":"4.+Beginner%27s+Guide+to+AWS+Step+functions+-+HelloWorld+Example.png",
"size":9714,
"eTag":"b21c122beffd36c0f0caabc4dbd8b16d",
"sequencer":"0060874D3FC2FA681D"
}
}
}
]
}
Process S3 event inside of SQS event.
def get_s3_event(event):
'''
Process S3 event inside of SQS message body
Extracts S3 message from the SQS message body field for every received event.
Skips Test Events
'''
for record_1 in event['Records']:
s3_event = json.loads(record_1['body'])
if 'Event' in s3_event.keys() and s3_event['Event'] == 's3:TestEvent':
break
for s3_rec in s3_event['Records']:
bucket = s3_rec['s3']['bucket']['name']
key = s3_rec['s3']['object']['key']
LOGGER.info(f'SQS EVENT bucket: {bucket}')
LOGGER.info(f'SQS EVENT key: {key}')
SQS event consists of a list of records representing a message grabbed from the SQS queue.
The most common field here is:
event['Records'][*]['body']#contains the text body of the SQS message
{
"Records":[
{
"messageId":"581db230-9853-4be3-a1fe-72c9a5b3e4d4",
"receiptHandle":"AQEBAwV4m8sSkn5jDd1k/GBLco1znfiv+xT0KTRZdEhQE7clWhAcFlVusMR07RQsBo5ImrlIDafWwdzfX+ZqsuRQPGWE0CcsR6ga8yQTTtG6N1CpWuotJ69Ef55XILtkOMKS+7HR3Ek1oigests3bmx5eCj0QlsRR56qSpj0o1yOOLktLsUehPPTEmWmWXGGPoTc2GayxbnL6lCheolswgiMdE2u0qmbaKV6Ek3E4PyvPfzkOx8XGXIurYJCkFMGcpi0sWrus1WO+dzbm5NtOL9n8qAzjxaMyMyV+nXvy+EO1QCLu2CuX0/rhKfjoq0+txWm8tNVb27VKbwsRKrU12odmV9mbULuvKDU55CqNOMF+LZl8zdZzceegvK2wgfA8KjMmpJ5wQVWo0S8WqVpcJCKSJYhoh/XzqGde+1gQ957YR8=",
"body":"{\"Records\":[{\"eventVersion\":\"2.1\",\"eventSource\":\"aws:s3\",\"awsRegion\":\"us-east-1\",\"eventTime\":\"2021-04-26T23:25:17.884Z\",\"eventName\":\"ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"AWS:012850762433:admin\"},\"requestParameters\":{\"sourceIPAddress\":\"108.41.58.86\"},\"responseElements\":{\"x-amz-request-id\":\"74CMGJPKH3HA1G87\",\"x-amz-id-2\":\"c52dEWNgb6rNUs7MNY20ArZHLgtNFiRJIhREfnNAnlLsXHotTUvLS7InfWnkniuawxPgTlkOkTKZICwIgsbfdHDZKQvL0LcV\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"tf-s3-queue-20210426224851886600000002\",\"bucket\":{\"name\":\"amaksimov-s3-sqs-demo-bucket\",\"ownerIdentity\":{\"principalId\":\"A1W385KKD8Q319\"},\"arn\":\"arn:aws:s3:::amaksimov-s3-sqs-demo-bucket\"},\"object\":{\"key\":\"6.+Beginner%27s+Guide+to+AWS+Step+functions+-+AWS+HelloWorld+example.png\",\"size\":458757,\"eTag\":\"e1148e80d0798b0e23502cbdae1fef58\",\"sequencer\":\"0060874BE06812C89A\"}}}]}",
"attributes":{
"ApproximateReceiveCount":"1",
"SentTimestamp":"1619479521272",
"SenderId":"AIDAJHIPRHEMV73VRJEBU",
"ApproximateFirstReceiveTimestamp":"1619479521279"
},
"messageAttributes":{
},
"md5OfBody":"7195d8d0f011fac4dc115b59d3e86797",
"eventSource":"aws:sqs",
"eventSourceARN":"arn:aws:sqs:us-east-1:012850762433:amaksimov-s3-event-notification-queue",
"awsRegion":"us-east-1"
}
]
}
https://aws.amazon.com/cloudwatch/
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/logs.html
https://docs.aws.amazon.com/code-samples/latest/catalog/python-cloudwatch-cloudwatch_basics.py.html
Logs with python
This is great: https://dltj.org/article/python-structlog-for-aws-lambda-cloudwatch/
https://github.com/kislyuk/watchtower
https://pypi.org/project/cloudwatch/
https://www.structlog.org/en/stable/
Query with boto3
https://stackoverflow.com/questions/59240107/how-to-query-cloudwatch-logs-using-boto3-in-python
import boto3
from datetime import datetime, timedelta
import time
client = boto3.client('logs')
query = "fields @timestamp, @message | parse @message \"username: * ClinicID: * nodename: *\" as username, ClinicID, nodename | filter ClinicID = 7667 and username='simran+test@example.com'"
log_group = '/aws/lambda/NAME_OF_YOUR_LAMBDA_FUNCTION'
start_query_response = client.start_query(
logGroupName=log_group,
startTime=int((datetime.today() - timedelta(hours=5)).timestamp()),
endTime=int(datetime.now().timestamp()),
queryString=query,
)
query_id = start_query_response['queryId']
response = None
while response == None or response['status'] == 'Running':
print('Waiting for query to complete ...')
time.sleep(1)
response = client.get_query_results(
queryId=query_id
)
AWS Example
"""Purpose
Shows how to use the AWS SDK for Python (Boto3) with Amazon CloudWatch to create
and manage custom metrics and alarms.
"""
from datetime import datetime, timedelta
import logging
from pprint import pprint
import random
import time
import boto3
from botocore.exceptions import ClientError
logger = logging.getLogger(__name__)
class CloudWatchWrapper:
"""Encapsulates Amazon CloudWatch functions."""
def __init__(self, cloudwatch_resource):
"""
:param cloudwatch_resource: A Boto3 CloudWatch resource.
"""
self.cloudwatch_resource = cloudwatch_resource
def list_metrics(self, namespace, name, recent=False):
"""
Gets the metrics within a namespace that have the specified name.
If the metric has no dimensions, a single metric is returned.
Otherwise, metrics for all dimensions are returned.
:param namespace: The namespace of the metric.
:param name: The name of the metric.
:param recent: When True, only metrics that have been active in the last
three hours are returned.
:return: An iterator that yields the retrieved metrics.
"""
try:
kwargs = {'Namespace': namespace, 'MetricName': name}
if recent:
kwargs['RecentlyActive'] = 'PT3H' # List past 3 hours only
metric_iter = self.cloudwatch_resource.metrics.filter(**kwargs)
logger.info("Got metrics for %s.%s.", namespace, name)
except ClientError:
logger.exception("Couldn't get metrics for %s.%s.", namespace, name)
raise
else:
return metric_iter
def put_metric_data(self, namespace, name, value, unit):
"""
Sends a single data value to CloudWatch for a metric. This metric is given
a timestamp of the current UTC time.
:param namespace: The namespace of the metric.
:param name: The name of the metric.
:param value: The value of the metric.
:param unit: The unit of the metric.
"""
try:
metric = self.cloudwatch_resource.Metric(namespace, name)
metric.put_data(
Namespace=namespace,
MetricData=[{
'MetricName': name,
'Value': value,
'Unit': unit
}]
)
logger.info("Put data for metric %s.%s", namespace, name)
except ClientError:
logger.exception("Couldn't put data for metric %s.%s", namespace, name)
raise
def put_metric_data_set(self, namespace, name, timestamp, unit, data_set):
"""
Sends a set of data to CloudWatch for a metric. All of the data in the set
have the same timestamp and unit.
:param namespace: The namespace of the metric.
:param name: The name of the metric.
:param timestamp: The UTC timestamp for the metric.
:param unit: The unit of the metric.
:param data_set: The set of data to send. This set is a dictionary that
contains a list of values and a list of corresponding counts.
The value and count lists must be the same length.
"""
try:
metric = self.cloudwatch_resource.Metric(namespace, name)
metric.put_data(
Namespace=namespace,
MetricData=[{
'MetricName': name,
'Timestamp': timestamp,
'Values': data_set['values'],
'Counts': data_set['counts'],
'Unit': unit}])
logger.info("Put data set for metric %s.%s.", namespace, name)
except ClientError:
logger.exception("Couldn't put data set for metric %s.%s.", namespace, name)
raise
def get_metric_statistics(self, namespace, name, start, end, period, stat_types):
"""
Gets statistics for a metric within a specified time span. Metrics are grouped
into the specified period.
:param namespace: The namespace of the metric.
:param name: The name of the metric.
:param start: The UTC start time of the time span to retrieve.
:param end: The UTC end time of the time span to retrieve.
:param period: The period, in seconds, in which to group metrics. The period
must match the granularity of the metric, which depends on
the metric's age. For example, metrics that are older than
three hours have a one-minute granularity, so the period must
be at least 60 and must be a multiple of 60.
:param stat_types: The type of statistics to retrieve, such as average value
or maximum value.
:return: The retrieved statistics for the metric.
"""
try:
metric = self.cloudwatch_resource.Metric(namespace, name)
stats = metric.get_statistics(
StartTime=start, EndTime=end, Period=period, Statistics=stat_types)
logger.info(
"Got %s statistics for %s.", len(stats['Datapoints']), stats['Label'])
except ClientError:
logger.exception("Couldn't get statistics for %s.%s.", namespace, name)
raise
else:
return stats
def create_metric_alarm(
self, metric_namespace, metric_name, alarm_name, stat_type, period,
eval_periods, threshold, comparison_op):
"""
Creates an alarm that watches a metric.
:param metric_namespace: The namespace of the metric.
:param metric_name: The name of the metric.
:param alarm_name: The name of the alarm.
:param stat_type: The type of statistic the alarm watches.
:param period: The period in which metric data are grouped to calculate
statistics.
:param eval_periods: The number of periods that the metric must be over the
alarm threshold before the alarm is set into an alarmed
state.
:param threshold: The threshold value to compare against the metric statistic.
:param comparison_op: The comparison operation used to compare the threshold
against the metric.
:return: The newly created alarm.
"""
try:
metric = self.cloudwatch_resource.Metric(metric_namespace, metric_name)
alarm = metric.put_alarm(
AlarmName=alarm_name,
Statistic=stat_type,
Period=period,
EvaluationPeriods=eval_periods,
Threshold=threshold,
ComparisonOperator=comparison_op)
logger.info(
"Added alarm %s to track metric %s.%s.", alarm_name, metric_namespace,
metric_name)
except ClientError:
logger.exception(
"Couldn't add alarm %s to metric %s.%s", alarm_name, metric_namespace,
metric_name)
raise
else:
return alarm
def get_metric_alarms(self, metric_namespace, metric_name):
"""
Gets the alarms that are currently watching the specified metric.
:param metric_namespace: The namespace of the metric.
:param metric_name: The name of the metric.
:returns: An iterator that yields the alarms.
"""
metric = self.cloudwatch_resource.Metric(metric_namespace, metric_name)
alarm_iter = metric.alarms.all()
logger.info("Got alarms for metric %s.%s.", metric_namespace, metric_name)
return alarm_iter
def enable_alarm_actions(self, alarm_name, enable):
"""
Enables or disables actions on the specified alarm. Alarm actions can be
used to send notifications or automate responses when an alarm enters a
particular state.
:param alarm_name: The name of the alarm.
:param enable: When True, actions are enabled for the alarm. Otherwise, they
disabled.
"""
try:
alarm = self.cloudwatch_resource.Alarm(alarm_name)
if enable:
alarm.enable_actions()
else:
alarm.disable_actions()
logger.info(
"%s actions for alarm %s.", "Enabled" if enable else "Disabled",
alarm_name)
except ClientError:
logger.exception(
"Couldn't %s actions alarm %s.", "enable" if enable else "disable",
alarm_name)
raise
def delete_metric_alarms(self, metric_namespace, metric_name):
"""
Deletes all of the alarms that are currently watching the specified metric.
:param metric_namespace: The namespace of the metric.
:param metric_name: The name of the metric.
"""
try:
metric = self.cloudwatch_resource.Metric(metric_namespace, metric_name)
metric.alarms.delete()
logger.info(
"Deleted alarms for metric %s.%s.", metric_namespace, metric_name)
except ClientError:
logger.exception(
"Couldn't delete alarms for metric %s.%s.", metric_namespace,
metric_name)
raise
def usage_demo():
print('-'*88)
print("Welcome to the Amazon CloudWatch metrics and alarms demo!")
print('-'*88)
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
cw_wrapper = CloudWatchWrapper(boto3.resource('cloudwatch'))
minutes = 20
metric_namespace = 'doc-example-metric'
metric_name = 'page_views'
start = datetime.utcnow() - timedelta(minutes=minutes)
print(f"Putting data into metric {metric_namespace}.{metric_name} spanning the "
f"last {minutes} minutes.")
for offset in range(0, minutes):
stamp = start + timedelta(minutes=offset)
cw_wrapper.put_metric_data_set(
metric_namespace, metric_name, stamp, 'Count', {
'values': [
random.randint(bound, bound * 2)
for bound in range(offset + 1, offset + 11)],
'counts': [random.randint(1, offset + 1) for _ in range(10)]
})
alarm_name = 'high_page_views'
period = 60
eval_periods = 2
print(f"Creating alarm {alarm_name} for metric {metric_name}.")
alarm = cw_wrapper.create_metric_alarm(
metric_namespace, metric_name, alarm_name, 'Maximum', period, eval_periods,
100, 'GreaterThanThreshold')
print(f"Alarm ARN is {alarm.alarm_arn}.")
print(f"Current alarm state is: {alarm.state_value}.")
print(f"Sending data to trigger the alarm. This requires data over the threshold "
f"for {eval_periods} periods of {period} seconds each.")
while alarm.state_value == 'INSUFFICIENT_DATA':
print("Sending data for the metric.")
cw_wrapper.put_metric_data(
metric_namespace, metric_name, random.randint(100, 200), 'Count')
alarm.load()
print(f"Current alarm state is: {alarm.state_value}.")
if alarm.state_value == 'INSUFFICIENT_DATA':
print(f"Waiting for {period} seconds...")
time.sleep(period)
else:
print("Wait for a minute for eventual consistency of metric data.")
time.sleep(period)
if alarm.state_value == 'OK':
alarm.load()
print(f"Current alarm state is: {alarm.state_value}.")
print(f"Getting data for metric {metric_namespace}.{metric_name} during timespan "
f"of {start} to {datetime.utcnow()} (times are UTC).")
stats = cw_wrapper.get_metric_statistics(
metric_namespace, metric_name, start, datetime.utcnow(), 60,
['Average', 'Minimum', 'Maximum'])
print(f"Got {len(stats['Datapoints'])} data points for metric "
f"{metric_namespace}.{metric_name}.")
pprint(sorted(stats['Datapoints'], key=lambda x: x['Timestamp']))
print(f"Getting alarms for metric {metric_name}.")
alarms = cw_wrapper.get_metric_alarms(metric_namespace, metric_name)
for alarm in alarms:
print(f"Alarm {alarm.name} is currently in state {alarm.state_value}.")
print(f"Deleting alarms for metric {metric_name}.")
cw_wrapper.delete_metric_alarms(metric_namespace, metric_name)
print("Thanks for watching!")
print('-'*88)
if __name__ == '__main__':
usage_demo()
Usage in tests.
"""
Contains common test fixtures used to run unit tests.
"""
import sys
# This is needed so Python can find test_tools on the path.
sys.path.append('../..')
from test_tools.fixtures.common import *
"""
Purpose
Unit tests for cloudwatch_basics.py
"""
from datetime import datetime, timedelta
from unittest.mock import MagicMock
import boto3
from botocore.exceptions import ClientError
import pytest
from cloudwatch_basics import CloudWatchWrapper
@pytest.mark.parametrize('error_code', [None, 'TestException'])
def test_list_metrics(make_stubber, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
namespace = 'test-namespace'
name = 'test-name'
metrics = [cloudwatch_resource.Metric(namespace, name) for _ in range(5)]
cloudwatch_stubber.stub_list_metrics(
namespace, name, metrics, recent=True, error_code=error_code)
if error_code is None:
got_metric_iter = cw_wrapper.list_metrics(namespace, name, True)
assert list(got_metric_iter) == metrics
else:
with pytest.raises(ClientError) as exc_info:
list(cw_wrapper.list_metrics(namespace, name, True))
assert exc_info.value.response['Error']['Code'] == error_code
@pytest.mark.parametrize('error_code', [None, 'TestException'])
def test_put_metric_data(make_stubber, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
namespace = 'test-namespace'
name = 'test-name'
value = 66
unit = 'Terabytes'
cloudwatch_stubber.stub_put_metric_data(
namespace, name, value, unit, error_code=error_code)
if error_code is None:
cw_wrapper.put_metric_data(namespace, name, value, unit)
else:
with pytest.raises(ClientError) as exc_info:
cw_wrapper.put_metric_data(namespace, name, value, unit)
assert exc_info.value.response['Error']['Code'] == error_code
@pytest.mark.parametrize('error_code', [None, 'TestException'])
def test_put_metric_data_set(make_stubber, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
namespace = 'test-namespace'
name = 'test-name'
timestamp = datetime.now()
unit = 'Milliseconds'
data_set = {
'values': [1, 2, 3, 4],
'counts': [5, 6, 7, 8]}
cloudwatch_stubber.stub_put_metric_data_set(
namespace, name, timestamp, unit, data_set, error_code=error_code)
if error_code is None:
cw_wrapper.put_metric_data_set(
namespace, name, timestamp, unit, data_set)
else:
with pytest.raises(ClientError) as exc_info:
cw_wrapper.put_metric_data_set(
namespace, name, timestamp, unit, data_set)
assert exc_info.value.response['Error']['Code'] == error_code
@pytest.mark.parametrize('error_code', [None, 'TestException'])
def test_get_metric_statistics(make_stubber, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
namespace = 'test-namespace'
name = 'test-name'
start = datetime.now() - timedelta(hours=3)
end = datetime.now() - timedelta(hours=1)
period = 60
stat_type = 'Average'
stats = [1, 2, 3, 4]
cloudwatch_stubber.stub_get_metric_statistics(
namespace, name, start, end, period, stat_type, stats, error_code=error_code)
if error_code is None:
got_stats = cw_wrapper.get_metric_statistics(
namespace, name, start, end, period, [stat_type])
assert got_stats['Label'] == name
assert [stat[stat_type] for stat in got_stats['Datapoints']] == stats
else:
with pytest.raises(ClientError) as exc_info:
cw_wrapper.get_metric_statistics(
namespace, name, start, end, period, [stat_type])
assert exc_info.value.response['Error']['Code'] == error_code
@pytest.mark.parametrize('error_code', [None, 'TestException'])
def test_create_metric_alarm(make_stubber, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
metric_namespace = 'test-namespace'
metric_name = 'test-name'
alarm_name = 'test-alarm'
stat_type = 'Average'
period = 60
eval_periods = 3
threshold = 66
comparison_op = 'LessThanThreshold'
cloudwatch_stubber.stub_put_metric_alarm(
metric_namespace, metric_name, alarm_name, stat_type, period, eval_periods,
threshold, comparison_op, error_code=error_code)
if error_code is None:
got_alarm = cw_wrapper.create_metric_alarm(
metric_namespace, metric_name, alarm_name, stat_type, period,
eval_periods, threshold, comparison_op)
assert got_alarm.name == alarm_name
else:
with pytest.raises(ClientError) as exc_info:
cw_wrapper.create_metric_alarm(
metric_namespace, metric_name, alarm_name, stat_type, period,
eval_periods, threshold, comparison_op)
assert exc_info.value.response['Error']['Code'] == error_code
@pytest.mark.parametrize('error_code', [None, 'TestException'])
def test_get_metric_alarms(make_stubber, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
namespace = 'test-namespace'
name = 'test-name'
alarms = []
for index in range(5):
alarm = MagicMock(alarm_arn=f'arn-{index}')
alarm.name = f'alarm-{index}'
alarms.append(alarm)
cloudwatch_stubber.stub_describe_alarms_for_metric(
namespace, name, alarms, error_code=error_code)
if error_code is None:
got_alarms = cw_wrapper.get_metric_alarms(namespace, name)
assert [{a.name: a.alarm_arn} for a in got_alarms] == [
{a.name: a.alarm_arn} for a in alarms]
else:
with pytest.raises(ClientError) as exc_info:
got_alarms = cw_wrapper.get_metric_alarms(namespace, name)
list(got_alarms)
assert exc_info.value.response['Error']['Code'] == error_code
@pytest.mark.parametrize('enable,error_code', [
(True, None),
(False, None),
(True, 'TestException')])
def test_enable_alarm_actions(make_stubber, enable, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
alarm_name = 'test-alarm_name'
if enable:
cloudwatch_stubber.stub_enable_alarm_actions(
alarm_name, error_code=error_code)
else:
cloudwatch_stubber.stub_disable_alarm_actions(
alarm_name, error_code=error_code)
if error_code is None:
cw_wrapper.enable_alarm_actions(alarm_name, enable)
else:
with pytest.raises(ClientError) as exc_info:
cw_wrapper.enable_alarm_actions(alarm_name, enable)
assert exc_info.value.response['Error']['Code'] == error_code
@pytest.mark.parametrize('error_code', [None, 'TestException'])
def test_delete_metric_alarms(make_stubber, error_code):
cloudwatch_resource = boto3.resource('cloudwatch')
cloudwatch_stubber = make_stubber(cloudwatch_resource.meta.client)
cw_wrapper = CloudWatchWrapper(cloudwatch_resource)
namespace = 'test-namespace'
name = 'test-name'
alarms = []
for index in range(5):
alarm = MagicMock(alarm_arn=f'arn-{index}')
alarm.name = f'alarm-{index}'
alarms.append(alarm)
cloudwatch_stubber.stub_describe_alarms_for_metric(namespace, name, alarms)
cloudwatch_stubber.stub_delete_alarms(alarms, error_code=error_code)
if error_code is None:
cw_wrapper.delete_metric_alarms(namespace, name)
else:
with pytest.raises(ClientError) as exc_info:
cw_wrapper.delete_metric_alarms(namespace, name)
assert exc_info.value.response['Error']['Code'] == error_code