Files
component-system/lib/components_elixir/datasheet_downloader.ex
2025-09-20 11:52:43 +02:00

152 lines
4.3 KiB
Elixir

defmodule ComponentsElixir.DatasheetDownloader do
@moduledoc """
Module for downloading datasheet PDFs from URLs.
"""
require Logger
@doc """
Downloads a PDF from the given URL and saves it to the datasheets folder.
Returns {:ok, filename} on success or {:error, reason} on failure.
"""
def download_pdf_from_url(url) when is_binary(url) do
with {:ok, %URI{scheme: scheme}} when scheme in ["http", "https"] <- validate_url(url),
{:ok, filename} <- generate_filename(url),
{:ok, response} <- fetch_pdf(url),
:ok <- validate_pdf_content(response.body),
:ok <- save_file(filename, response.body) do
{:ok, filename}
else
{:error, reason} -> {:error, reason}
error -> {:error, "Unexpected error: #{inspect(error)}"}
end
end
def download_pdf_from_url(_), do: {:error, "Invalid URL"}
defp validate_url(url) do
case URI.parse(url) do
%URI{scheme: scheme} when scheme in ["http", "https"] ->
{:ok, URI.parse(url)}
_ ->
{:error, "Invalid URL scheme. Only HTTP and HTTPS are supported."}
end
end
defp generate_filename(url) do
# Try to extract a meaningful filename from the URL
uri = URI.parse(url)
original_filename =
case Path.basename(uri.path || "") do
"" ->
"datasheet"
basename ->
# Remove extension and sanitize
basename
|> Path.rootname()
|> sanitize_filename()
end
# Create a unique filename with timestamp
timestamp = System.unique_integer([:positive])
filename = "#{timestamp}_#{original_filename}.pdf"
{:ok, filename}
end
defp sanitize_filename(filename) do
filename
# Replace non-word chars with underscores
|> String.replace(~r/[^\w\-_]/, "_")
# Replace multiple underscores with single
|> String.replace(~r/_+/, "_")
# Remove leading/trailing underscores
|> String.trim("_")
# Limit length
|> String.slice(0, 50)
|> case do
"" -> "datasheet"
name -> name
end
end
defp fetch_pdf(url) do
case Req.get(url,
redirect: true,
max_redirects: 5,
receive_timeout: 30_000,
headers: [
{"User-Agent", "ComponentSystem/1.0 DatasheetDownloader"}
]
) do
{:ok, %Req.Response{status: 200} = response} ->
{:ok, response}
{:ok, %Req.Response{status: status}} ->
{:error, "HTTP error: #{status}"}
{:error, reason} ->
Logger.error("Failed to download PDF from #{url}: #{inspect(reason)}")
{:error, "Download failed: #{inspect(reason)}"}
end
end
defp validate_pdf_content(body) do
# Check if the response body looks like a PDF (starts with %PDF)
case body do
<<"%PDF", _rest::binary>> ->
:ok
_ ->
{:error, "Downloaded content is not a valid PDF file"}
end
end
defp save_file(filename, content) do
uploads_dir = Application.get_env(:components_elixir, :uploads_dir)
datasheets_dir = Path.join([uploads_dir, "datasheets"])
file_path = Path.join(datasheets_dir, filename)
# Ensure the datasheets directory exists
case File.mkdir_p(datasheets_dir) do
:ok ->
case File.write(file_path, content) do
:ok ->
Logger.info("Successfully saved datasheet: #{filename}")
:ok
{:error, reason} ->
Logger.error("Failed to save datasheet file: #{inspect(reason)}")
{:error, "Failed to save file: #{inspect(reason)}"}
end
{:error, reason} ->
Logger.error("Failed to create datasheets directory: #{inspect(reason)}")
{:error, "Failed to create directory: #{inspect(reason)}"}
end
end
@doc """
Deletes a datasheet file from the filesystem.
"""
def delete_datasheet_file(nil), do: :ok
def delete_datasheet_file(""), do: :ok
def delete_datasheet_file(filename) do
uploads_dir = Application.get_env(:components_elixir, :uploads_dir)
file_path = Path.join([uploads_dir, "datasheets", filename])
case File.rm(file_path) do
:ok ->
Logger.info("Deleted datasheet file: #{filename}")
:ok
{:error, reason} ->
Logger.warning("Failed to delete datasheet file #{filename}: #{inspect(reason)}")
{:error, reason}
end
end
end