diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6d84b44 --- /dev/null +++ b/.gitignore @@ -0,0 +1,84 @@ +# ============================================================================= +# GoHorse Jobs - Root .gitignore +# ============================================================================= + +# ----------------------------------------------------------------------------- +# OS & IDE +# ----------------------------------------------------------------------------- +.DS_Store +Thumbs.db +*.swp +*.swo +*~ +.idea/ +.vscode/ +*.code-workspace + +# ----------------------------------------------------------------------------- +# Environment files (keep examples) +# ----------------------------------------------------------------------------- +.env +.env.local +.env.*.local +!.env.example + +# ----------------------------------------------------------------------------- +# Logs +# ----------------------------------------------------------------------------- +logs/ +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# ----------------------------------------------------------------------------- +# Backend (Go) +# ----------------------------------------------------------------------------- +backend/main +backend/*.exe +backend/tmp/ +backend/.air.toml + +# ----------------------------------------------------------------------------- +# Frontend (Next.js) +# ----------------------------------------------------------------------------- +frontend/.next/ +frontend/out/ +frontend/node_modules/ +frontend/.pnp/ +frontend/.pnp.js +frontend/.vercel/ + +# ----------------------------------------------------------------------------- +# Seeder API (Node.js) +# ----------------------------------------------------------------------------- +seeder-api/node_modules/ +seeder-api/dist/ + +# ----------------------------------------------------------------------------- +# Job Scraper (Python) +# ----------------------------------------------------------------------------- +job-scraper-multisite/__pycache__/ +job-scraper-multisite/*.pyc +job-scraper-multisite/.venv/ +job-scraper-multisite/venv/ +job-scraper-multisite/output/*.csv +!job-scraper-multisite/output/.gitkeep + +# ----------------------------------------------------------------------------- +# Build artifacts +# ----------------------------------------------------------------------------- +*.exe +*.dll +*.so +*.dylib +dist/ +build/ +coverage/ + +# ----------------------------------------------------------------------------- +# Misc +# ----------------------------------------------------------------------------- +.cache/ +*.bak +*.tmp diff --git a/job-scraper-multisite/.dockerignore b/job-scraper-multisite/.dockerignore new file mode 100644 index 0000000..090e9b3 --- /dev/null +++ b/job-scraper-multisite/.dockerignore @@ -0,0 +1,13 @@ +# Python scraper .dockerignore +__pycache__/ +*.pyc +*.pyo +.venv/ +venv/ +.env +.git/ +.gitignore +README.md +*.md +output/*.csv +.DS_Store diff --git a/job-scraper-multisite/.gitignore b/job-scraper-multisite/.gitignore new file mode 100644 index 0000000..08676c4 --- /dev/null +++ b/job-scraper-multisite/.gitignore @@ -0,0 +1,29 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +.venv/ +venv/ +ENV/ + +# Output (keep .gitkeep) +output/*.csv + +# Environment +.env +.env.* + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db + +# Distribution +dist/ +build/ +*.egg-info/ diff --git a/job-scraper-multisite/Dockerfile b/job-scraper-multisite/Dockerfile new file mode 100644 index 0000000..00e63ec --- /dev/null +++ b/job-scraper-multisite/Dockerfile @@ -0,0 +1,25 @@ +# ============================================================================= +# GoHorse Jobs Scraper - Python Dockerfile +# ============================================================================= + +FROM python:3.12-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +# Copy source +COPY . . + +# Security: Run as non-root +RUN useradd -m -u 1001 scraper && \ + chown -R scraper:scraper /app + +USER scraper + +# Create output directory +RUN mkdir -p /app/output + +CMD ["python", "main_scraper.py"] diff --git a/seeder-api/.dockerignore b/seeder-api/.dockerignore new file mode 100644 index 0000000..d76f2b9 --- /dev/null +++ b/seeder-api/.dockerignore @@ -0,0 +1,11 @@ +# Seeder API .dockerignore +node_modules/ +.env +.env.* +!.env.example +.git/ +.gitignore +README.md +*.md +.DS_Store +*.log diff --git a/seeder-api/.gitignore b/seeder-api/.gitignore new file mode 100644 index 0000000..23a4e98 --- /dev/null +++ b/seeder-api/.gitignore @@ -0,0 +1,20 @@ +# Node.js +node_modules/ +.env +.env.* +!.env.example + +# Logs +*.log +npm-debug.log* + +# OS +.DS_Store +Thumbs.db + +# IDE +.idea/ +.vscode/ + +# Git +.git/ diff --git a/seeder-api/Dockerfile b/seeder-api/Dockerfile new file mode 100644 index 0000000..55ba806 --- /dev/null +++ b/seeder-api/Dockerfile @@ -0,0 +1,33 @@ +# ============================================================================= +# GoHorse Jobs Seeder API - Production Dockerfile +# ============================================================================= + +FROM node:20-alpine + +WORKDIR /app + +# Install dependencies +COPY package*.json ./ +RUN npm ci --only=production && npm cache clean --force + +# Copy source +COPY src/ ./src/ + +# Security: Run as non-root +RUN addgroup -g 1001 -S nodejs && \ + adduser -u 1001 -S seeder -G nodejs && \ + chown -R seeder:nodejs /app + +USER seeder + +# Environment +ENV NODE_ENV=production \ + PORT=3001 + +EXPOSE 3001 + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget -qO- http://localhost:3001/health || exit 1 + +CMD ["node", "src/index.js"]