initial commit
This commit is contained in:
commit
128a1d6854
5
.dockerignore
Normal file
5
.dockerignore
Normal file
@ -0,0 +1,5 @@
|
||||
bin/
|
||||
include/
|
||||
lib/
|
||||
lib64/
|
||||
pyvenv.cfg
|
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
# VENV
|
||||
bin/
|
||||
include/
|
||||
lib/
|
||||
lib64/
|
||||
pyvenv.cfg
|
11
Dockerfile
Normal file
11
Dockerfile
Normal file
@ -0,0 +1,11 @@
|
||||
FROM python:3.12-slim AS base
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY main.py .
|
||||
|
||||
CMD ["python", "main.py"]
|
33
README.md
Normal file
33
README.md
Normal file
@ -0,0 +1,33 @@
|
||||
# S3 Recent File Cleaner
|
||||
|
||||
Simple python script to loop through all files in an S3 Endpoint and delete excess files based on a retention amount.
|
||||
|
||||
Retention ammount: 5
|
||||
|
||||
## Development
|
||||
|
||||
Python 3.12 should be used for development and ideally a virtual environment should be used. With the source checked out run the following command to create a virtual environment.
|
||||
|
||||
```bash
|
||||
python3 -m venv ./
|
||||
```
|
||||
|
||||
The required packages can be installed by running the following command after activating the virtual environment.
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
If new packages are installed, the requirements file can be updated by running the following command.
|
||||
|
||||
```bash
|
||||
pip freeze > requirements.txt
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
The S3 Recent File Cleaner is best run as a once off docker image. The image is already built so can be run with the following command.
|
||||
|
||||
```bash
|
||||
docker run --rm -e S3_ACCESS_KEY=your_access_key -e S3_SECRET_KEY=your_secret_key -e S3_ENDPOINT=your_endpoint s3-recent-file-cleaner
|
||||
```
|
84
main.py
Normal file
84
main.py
Normal file
@ -0,0 +1,84 @@
|
||||
import sys
|
||||
import boto3
|
||||
import os
|
||||
|
||||
DEFAULT = ''
|
||||
S3_ENDPOINT = os.getenv('S3_ENDPOINT', DEFAULT)
|
||||
S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY', DEFAULT)
|
||||
S3_SECRET_KEY = os.getenv('S3_SECRET_KEY', DEFAULT)
|
||||
DEFAULT_FILES_TO_KEEP = 5
|
||||
|
||||
def validate_env_vars():
|
||||
if S3_ENDPOINT == '':
|
||||
print('S3_ENDPOINT is not set')
|
||||
sys.exit(1)
|
||||
|
||||
if S3_ACCESS_KEY == '':
|
||||
print('S3_ACCESS_KEY is not set')
|
||||
sys.exit(1)
|
||||
|
||||
if S3_SECRET_KEY == '':
|
||||
print('S3_SECRET_KEY is not set')
|
||||
sys.exit(1)
|
||||
|
||||
def clean_files():
|
||||
print('Cleaning files...')
|
||||
|
||||
buckets = get_buckets()
|
||||
|
||||
if len(buckets) == 0:
|
||||
print('No buckets found')
|
||||
return
|
||||
|
||||
# Loop through each bucket and check how many files are in it
|
||||
for bucket in buckets:
|
||||
try:
|
||||
bucket_name = bucket['Name']
|
||||
print(f'Bucket: {bucket_name}')
|
||||
|
||||
# Get all files in the bucket
|
||||
s3 = boto3.client('s3', endpoint_url=S3_ENDPOINT, aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY)
|
||||
response = s3.list_objects_v2(Bucket=bucket_name)
|
||||
files = response['Contents']
|
||||
|
||||
# Check if the number of files is greater than the number of files to keep
|
||||
if len(files) > DEFAULT_FILES_TO_KEEP:
|
||||
# Sort the files by date
|
||||
files.sort(key=lambda x: x['LastModified'])
|
||||
|
||||
# Delete the oldest files
|
||||
for i in range(len(files) - DEFAULT_FILES_TO_KEEP):
|
||||
try:
|
||||
file = files[i]
|
||||
file_name = file['Key']
|
||||
print(f'Deleting file: {file_name}')
|
||||
s3.delete_object(Bucket=bucket_name, Key=file_name)
|
||||
print(f'File deleted: {file_name}')
|
||||
except Exception as e:
|
||||
print(f'Error deleting file: {file_name}')
|
||||
print(e)
|
||||
else:
|
||||
print('No files to delete')
|
||||
except Exception as e:
|
||||
print('Error processing bucket')
|
||||
print(e)
|
||||
|
||||
print('Files cleaned')
|
||||
|
||||
def get_buckets():
|
||||
try:
|
||||
# Get all buckets for this endpoint
|
||||
s3 = boto3.client('s3', endpoint_url=S3_ENDPOINT, aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY)
|
||||
response = s3.list_buckets()
|
||||
return response['Buckets']
|
||||
except Exception as e:
|
||||
print('Error getting buckets')
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Validate the environment variables
|
||||
validate_env_vars()
|
||||
|
||||
# Clean the files
|
||||
clean_files()
|
7
requirements.txt
Normal file
7
requirements.txt
Normal file
@ -0,0 +1,7 @@
|
||||
boto3==1.35.90
|
||||
botocore==1.35.90
|
||||
jmespath==1.0.1
|
||||
python-dateutil==2.9.0.post0
|
||||
s3transfer==0.10.4
|
||||
six==1.17.0
|
||||
urllib3==2.3.0
|
Loading…
Reference in New Issue
Block a user