Created
July 8, 2025 10:56
-
-
Save norandom/06487176c287f9f34f0dc29055a71e92 to your computer and use it in GitHub Desktop.
Upload Markdown files to RAGflow for parsing (Collection)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Check for jq and install if not found | |
| if ! command -v jq &> /dev/null | |
| then | |
| echo "jq could not be found, attempting to install." | |
| if [ -f /etc/os-release ]; then | |
| . /etc/os-release | |
| if [ "$ID" = "ubuntu" ] || [ "$ID" = "debian" ]; then | |
| sudo apt-get update && sudo apt-get install -y jq | |
| elif [ "$ID" = "centos" ] || [ "$ID" = "rhel" ] || [ "$ID" = "fedora" ]; then | |
| sudo yum install -y jq | |
| elif [ "$ID" = "arch" ]; then | |
| sudo pacman -Sy --noconfirm jq | |
| else | |
| echo "Unsupported OS for automatic jq installation. Please install jq manually." >&2 | |
| exit 1 | |
| fi | |
| else | |
| echo "Could not detect OS for automatic jq installation. Please install jq manually." >&2 | |
| exit 1 | |
| fi | |
| if ! command -v jq &> /dev/null; then | |
| echo "jq installation failed. Exiting." >&2 | |
| exit 1 | |
| fi | |
| echo "jq installed successfully." | |
| fi | |
| # Configuration | |
| RAGFLOW_BASE_URL="http://1.2.3.4:9380" | |
| RAGFLOW_API_KEY="ragflow-myAPI" | |
| DATASET_NAME="Zscaler" # Name of the dataset to upload to | |
| DATASET_ID="" # This will be dynamically resolved | |
| # Function to get dataset ID by name | |
| get_dataset_id() { | |
| local dataset_name="$1" | |
| local response=$(curl -s -k -X GET "${RAGFLOW_BASE_URL}/api/v1/datasets" -H "Authorization: Bearer ${RAGFLOW_API_KEY}") | |
| local dataset_id=$(echo "$response" | jq -r ".data[] | select(.name == \"$dataset_name\") | .id") | |
| echo "$dataset_id" | |
| } | |
| # Resolve DATASET_ID | |
| echo "Resolving ID for dataset: $DATASET_NAME" | |
| DATASET_ID=$(get_dataset_id "$DATASET_NAME") | |
| if [ -z "$DATASET_ID" ]; then | |
| echo "Error: Could not resolve ID for dataset '$DATASET_NAME'. Exiting." | |
| exit 1 | |
| fi | |
| echo "Resolved DATASET_ID: $DATASET_ID" | |
| # Get the directory where this script is located | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| # Find all markdown files in combined_docs directory | |
| MARKDOWN_DIR="$SCRIPT_DIR/combined_docs" | |
| if [ ! -d "$MARKDOWN_DIR" ]; then | |
| echo "Error: Directory $MARKDOWN_DIR does not exist. Run combine_markdown.sh first." | |
| exit 1 | |
| fi | |
| MARKDOWN_FILES=("$MARKDOWN_DIR"/*.md) | |
| TOTAL_MARKDOWNS=${#MARKDOWN_FILES[@]} | |
| UPLOADED_COUNT=0 | |
| echo "Found $TOTAL_MARKDOWNS markdown files in $MARKDOWN_DIR." | |
| # Loop through and upload each markdown file | |
| for markdown_file in "${MARKDOWN_FILES[@]}"; do | |
| # Skip if no files found (glob expansion failed) | |
| if [ ! -f "$markdown_file" ]; then | |
| echo "No markdown files found in $MARKDOWN_DIR" | |
| break | |
| fi | |
| echo "--- Processing file: $markdown_file ---" | |
| # Upload markdown to RAGFlow | |
| echo "Uploading $markdown_file to RAGFlow..." | |
| UPLOAD_RESPONSE=$(curl -k -s -w "\n%{http_code}" -X POST \ | |
| "${RAGFLOW_BASE_URL}/api/v1/datasets/${DATASET_ID}/documents" \ | |
| -H "Content-Type: multipart/form-data" \ | |
| -H "Authorization: Bearer ${RAGFLOW_API_KEY}" \ | |
| -F "file=@${markdown_file}") | |
| HTTP_CODE=$(echo "$UPLOAD_RESPONSE" | tail -n1) | |
| RESPONSE_BODY=$(echo "$UPLOAD_RESPONSE" | sed '$d') | |
| if [ "$HTTP_CODE" -eq 200 ]; then | |
| echo "Upload successful for $markdown_file. Response: $RESPONSE_BODY" | |
| UPLOADED_COUNT=$((UPLOADED_COUNT + 1)) | |
| else | |
| echo "Upload failed for $markdown_file. HTTP Code: $HTTP_CODE, Response: $RESPONSE_BODY" | |
| fi | |
| done | |
| echo "--- Upload process complete ---" | |
| echo "Total markdown files processed: $TOTAL_MARKDOWNS" | |
| echo "Total markdown files uploaded: $UPLOADED_COUNT" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment