initial commit
This commit is contained in:
commit
128a1d6854
5
.dockerignore
Normal file
5
.dockerignore
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
bin/
|
||||||
|
include/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
pyvenv.cfg
|
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# VENV
|
||||||
|
bin/
|
||||||
|
include/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
pyvenv.cfg
|
11
Dockerfile
Normal file
11
Dockerfile
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
FROM python:3.12-slim AS base
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY main.py .
|
||||||
|
|
||||||
|
CMD ["python", "main.py"]
|
33
README.md
Normal file
33
README.md
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# S3 Recent File Cleaner
|
||||||
|
|
||||||
|
Simple python script to loop through all files in an S3 Endpoint and delete excess files based on a retention amount.
|
||||||
|
|
||||||
|
Retention ammount: 5
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
Python 3.12 should be used for development and ideally a virtual environment should be used. With the source checked out run the following command to create a virtual environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 -m venv ./
|
||||||
|
```
|
||||||
|
|
||||||
|
The required packages can be installed by running the following command after activating the virtual environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
If new packages are installed, the requirements file can be updated by running the following command.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip freeze > requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
The S3 Recent File Cleaner is best run as a once off docker image. The image is already built so can be run with the following command.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm -e S3_ACCESS_KEY=your_access_key -e S3_SECRET_KEY=your_secret_key -e S3_ENDPOINT=your_endpoint s3-recent-file-cleaner
|
||||||
|
```
|
84
main.py
Normal file
84
main.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
import sys
|
||||||
|
import boto3
|
||||||
|
import os
|
||||||
|
|
||||||
|
DEFAULT = ''
|
||||||
|
S3_ENDPOINT = os.getenv('S3_ENDPOINT', DEFAULT)
|
||||||
|
S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY', DEFAULT)
|
||||||
|
S3_SECRET_KEY = os.getenv('S3_SECRET_KEY', DEFAULT)
|
||||||
|
DEFAULT_FILES_TO_KEEP = 5
|
||||||
|
|
||||||
|
def validate_env_vars():
|
||||||
|
if S3_ENDPOINT == '':
|
||||||
|
print('S3_ENDPOINT is not set')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if S3_ACCESS_KEY == '':
|
||||||
|
print('S3_ACCESS_KEY is not set')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if S3_SECRET_KEY == '':
|
||||||
|
print('S3_SECRET_KEY is not set')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def clean_files():
|
||||||
|
print('Cleaning files...')
|
||||||
|
|
||||||
|
buckets = get_buckets()
|
||||||
|
|
||||||
|
if len(buckets) == 0:
|
||||||
|
print('No buckets found')
|
||||||
|
return
|
||||||
|
|
||||||
|
# Loop through each bucket and check how many files are in it
|
||||||
|
for bucket in buckets:
|
||||||
|
try:
|
||||||
|
bucket_name = bucket['Name']
|
||||||
|
print(f'Bucket: {bucket_name}')
|
||||||
|
|
||||||
|
# Get all files in the bucket
|
||||||
|
s3 = boto3.client('s3', endpoint_url=S3_ENDPOINT, aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY)
|
||||||
|
response = s3.list_objects_v2(Bucket=bucket_name)
|
||||||
|
files = response['Contents']
|
||||||
|
|
||||||
|
# Check if the number of files is greater than the number of files to keep
|
||||||
|
if len(files) > DEFAULT_FILES_TO_KEEP:
|
||||||
|
# Sort the files by date
|
||||||
|
files.sort(key=lambda x: x['LastModified'])
|
||||||
|
|
||||||
|
# Delete the oldest files
|
||||||
|
for i in range(len(files) - DEFAULT_FILES_TO_KEEP):
|
||||||
|
try:
|
||||||
|
file = files[i]
|
||||||
|
file_name = file['Key']
|
||||||
|
print(f'Deleting file: {file_name}')
|
||||||
|
s3.delete_object(Bucket=bucket_name, Key=file_name)
|
||||||
|
print(f'File deleted: {file_name}')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error deleting file: {file_name}')
|
||||||
|
print(e)
|
||||||
|
else:
|
||||||
|
print('No files to delete')
|
||||||
|
except Exception as e:
|
||||||
|
print('Error processing bucket')
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
print('Files cleaned')
|
||||||
|
|
||||||
|
def get_buckets():
|
||||||
|
try:
|
||||||
|
# Get all buckets for this endpoint
|
||||||
|
s3 = boto3.client('s3', endpoint_url=S3_ENDPOINT, aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY)
|
||||||
|
response = s3.list_buckets()
|
||||||
|
return response['Buckets']
|
||||||
|
except Exception as e:
|
||||||
|
print('Error getting buckets')
|
||||||
|
print(e)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Validate the environment variables
|
||||||
|
validate_env_vars()
|
||||||
|
|
||||||
|
# Clean the files
|
||||||
|
clean_files()
|
7
requirements.txt
Normal file
7
requirements.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
boto3==1.35.90
|
||||||
|
botocore==1.35.90
|
||||||
|
jmespath==1.0.1
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
s3transfer==0.10.4
|
||||||
|
six==1.17.0
|
||||||
|
urllib3==2.3.0
|
Loading…
Reference in New Issue
Block a user