Commit 690a4efc authored by Matthias Weidenthaler's avatar Matthias Weidenthaler
Browse files

Merge branch 'master' into main

parents d3aa8b3a d4657831
Pipeline #6999 failed with stages
in 3 minutes and 5 seconds
# Byte-compiled / optimized / DLL files
__pycache__/
*.pyc
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
\ No newline at end of file
**astropy 需升级至 5.3**
**老写法同时兼容本地nas和云上s3,只要读写路径以s3:// 协议开头会自动识别**
如果需要读写S3时,需要传入s3的密钥和endpoint等配置,有两种方法可选
### 方法1 环境变量
执行下面三个环境变量,本文档下面介绍到的所有方法都会尝试读取环境变量以获取配置
```python
s3_options = {
'key': os.getenv('S3_KEY'),
'secret': os.getenv('S3_SECRET'),
'endpoint_url': os.getenv('S3_ENDPOINT_URL')
}
```
### 方法2 每次调用方法时传入 s3_options
```
在第一个kwargs参数位置指定s3_options, s3_options示例:
```json
s3_options = {
"key": "minioadmin",
"secret": "minioadmin",
"endpoint_url": "http://localhost:9000"
}
```
## 本地到s3的上传与下载
### 上传
```python
from csst_fs import s3_fs
# single file,s3_options from env
s3_fs.put('requirements.txt', 's3://csst-prod/gaia/test/requirements.txt')
# single file,s3_options from function parameter
s3_fs.put('requirements.txt', 's3://csst-prod/gaia/test/requirements.txt', s3_options=s3_options)
# folder,to s3 s3://csst-prod/common
s3_fs.put('./common', 's3://csst-prod/', recursive=True)
s3_fs.put('./common', 's3://csst-prod/', s3_options=s3_options, recursive=True)
```
### 下载
```python
from csst_fs import s3_fs
# single file
s3_fs.get('s3://csst-prod/gaia/test/requirements.txt', 'requirements.txt')
s3_fs.get('s3://csst-prod/gaia/test/requirements.txt', 'requirements.txt', s3_options=s3_options)
# folder
s3_fs.get('s3://csst-prod/gaia/data', './', recursive=True)
s3_fs.get('s3://csst-prod/gaia/data', './', s3_options=s3_options, recursive=True)
# get file or folder info
s3_fs.info('s3://csst-prod/gaia/data')
s3_fs.info('s3://csst-prod/gaia/test/requirements.txt', s3_options=s3_options)
```
### Open for read/write
```python
from csst_fs import s3_fs
# open single file (s3 or local)
with s3_fs.open('s3://csst-prod/gaia/data') as file:
file.read()
with s3_fs.open('s3://csst-prod/gaia/test/requirements.txt', s3_options=s3_options, mode='w') as file:
file.write("CSST")
```
### Check if the given file path exists
```python
from csst_fs import fs
# local or on s3, depending on the given path
fs.isfile('requirements.txt')
fs.isfile('s3://csst-prod/test.txt')
fs.isfile('s3://csst-prod/test.txt', s3_options=s3_options)
```
### Delete a file from local or s3
```python
from csst_fs import fs
# local or on s3, depending on the given path
fs.delete('requirements.txt') # uses os.remove
fs.delete('test', dir_fd=1)
fs.delete('s3://csst-prod/test.txt') # uses fsspec.delete
fs.delete('s3://csst-prod/test.txt', recursive=True, maxdepth=3)
fs.delete('s3://csst-prod/test.txt', s3_options=s3_options)
```
## astropy直接读写s3的写法适配
### fits.open
#### 老写法
```python
fits.open(path)
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/files.html#open](https://docs.astropy.org/en/stable/io/fits/api/files.html#open)
#### 新写法
```python
from csst_fs import fsspec_fits
fsspec_fits.open("s3://csst-prod/gaia/xx.fits")
fsspec_fits.open("s3://csst-prod/gaia/xx.fits", s3_options=s3_options)
```
### fits.getheader
#### 老写法
```python
fits.getheader(filename=in_image_path, ext=1)
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/files.html#getheader](https://docs.astropy.org/en/stable/io/fits/api/files.html#getheader)
#### 新写法
```python
from csst_fs import fsspec_fits
fsspec_fits.getheader(filename=in_image_path, ext=1)
fsspec_fits.getheader(filename=in_image_path, ext=1, s3_options=s3_options)
```
### fits.getdata
#### 老写法
```python
fits.getdata(in_ref_flat)
fits.getdata( in_ref_shutter, ext=1)
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/files.html#getdata](https://docs.astropy.org/en/stable/io/fits/api/files.html#getdata)
#### 新写法
```python
from csst_fs import fsspec_fits
fsspec_fits.getdata(in_ref_flat)
fsspec_fits.getdata(in_ref_flat, s3_options=s3_options)
fsspec_fits.getdata( in_ref_shutter, ext=1)
fsspec_fits.getdata( in_ref_shutter, s3_options=s3_options, ext=1)
```
### fits.getval
#### 老写法
```python
fits.getval(filename, keyword)
fits.getval(filename, keyword, ext=1)
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/files.html#getdata](https://docs.astropy.org/en/stable/io/fits/api/files.html#getval)
#### 新写法
```python
from csst_fs import fsspec_fits
fsspec_fits.getval(filename, keyword)
fsspec_fits.getval(filename, keyword, s3_options=s3_options)
fsspec_fits.getval(filename, keyword, ext=1)
fsspec_fits.getval(filename, keyword, s3_options=s3_options, ext=1)
```
### header.tofile
#### 老写法
```python
header.tofile(out_head_path)
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/headers.html#astropy.io.fits.Header.tofile](https://docs.astropy.org/en/stable/io/fits/api/headers.html#astropy.io.fits.Header.tofile)
#### 新写法
```python
from csst_fs import fsspec_header
fsspec_header.tofile(header, out_head_path)
fsspec_header.tofile(header, out_head_path, s3_options=s3_options)
```
### header.fromfile
#### 老写法
```python
header.fromfile(filename)
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/headers.html#astropy.io.fits.Header.fromfile](https://docs.astropy.org/en/stable/io/fits/api/headers.html#astropy.io.fits.Header.fromfile)
#### 新写法
```python
from csst_fs import fsspec_header
fsspec_header.fromfile(filename)
fsspec_header.fromfile(filename, s3_options=s3_options)
```
### HDUList.writeto
#### 老写法
```python
hdul_img.writeto(hdul_img, out_combined_img, overwrite=True)
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/hdulists.html#astropy.io.fits.HDUList.writeto](https://docs.astropy.org/en/stable/io/fits/api/hdulists.html#astropy.io.fits.HDUList.writeto)
#### 新写法
```python
from csst_fs import fsspec_HDUList
fsspec_HDUList.writeto(hdul_img, out_combined_img, overwrite=True)
fsspec_HDUList.writeto(hdul_img, out_combined_img, s3_options=s3_options, overwrite=True)
```
### HDUList.fromfile
#### 老写法
```python
hdul_img = fits.HDUList.fromfile("hdulist.fits")
```
usage reference:
[https://docs.astropy.org/en/stable/io/fits/api/hdulists.html#astropy.io.fits.HDUList.fromfile](https://docs.astropy.org/en/stable/io/fits/api/hdulists.html#astropy.io.fits.HDUList.fromfile)
#### 新写法
```python
from csst_fs import fsspec_HDUList
hdul_img = fsspec_HDUList.fromfile("hdulist.fits")
hdul_img = fsspec_HDUList.fromfile("hdulist.fits", cache=False, s3_options=s3_options)
```
### table.Table.read
#### 老写法
```python
from astropy import table
table.Table.read(out_gaia_ldac, hdu=2)
```
usage reference:
[https://docs.astropy.org/en/stable/api/astropy.table.Table.html#astropy.table.Table.read](https://docs.astropy.org/en/stable/api/astropy.table.Table.html#astropy.table.Table.read
)
#### 新写法
```python
from csst_fs import fsspec_table
fsspec_table.read(out_gaia_ldac, hdu=2)
fsspec_table.read(out_gaia_ldac, s3_options=s3_options, hdu=2)
```
### table.Table.write
#### 老写法
```python
ps.write(ref, format='fits', overwrite=True)
```
usage reference:
[https://docs.astropy.org/en/stable/api/astropy.table.Table.html#astropy.table.Table.write](https://docs.astropy.org/en/stable/api/astropy.table.Table.html#astropy.table.Table.write)
#### 新写法
```python
from csst_fs import fsspec_table
fsspec_table.write(ps, ref, format='fits', overwrite=True)
fsspec_table.write(ps, ref, format='fits', s3_options=s3_options, overwrite=True)
```
from .fits import fsspec_fits
from .fits import fsspec_header
from .fits import fsspec_HDUList
from .table import fsspec_table
from . import s3_fs
from . import fs
\ No newline at end of file
import fsspec
from astropy.io import fits
from ..fsspec_fileobj import open_fileobj
from ..s3_config import load_s3_options
def writeto(fits_HDUList, out_path, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(out_path, s3_options, mode='wb')
fits_HDUList.writeto(fileobj, *args, **kwargs)
if hasattr(fileobj, 'close'):
fileobj.close()
def fromfile(filename, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(filename, s3_options, mode='rb')
return fits.HDUList.fromfile(fileobj, *args, **kwargs)
\ No newline at end of file
from astropy.io import fits
from ..fsspec_fileobj import open_fileobj
from ..s3_config import load_s3_options
def open(filename, *args, s3_options=load_s3_options(), **kwargs):
return fits.open(filename, *args, fsspec_kwargs=s3_options, **kwargs)
def getdata(filename, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(filename, s3_options, mode='rb')
return fits.getdata(fileobj, *args, **kwargs)
def getval(filename, keyword, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(filename, s3_options, mode='rb')
return fits.getval(fileobj, keyword, *args, **kwargs)
def getheader(filename, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(filename, s3_options, mode='rb')
return fits.getheader(fileobj, *args, **kwargs)
import fsspec
from astropy.io import fits
from ..fsspec_fileobj import open_fileobj
from ..s3_config import load_s3_options
def tofile(fits_header, out_path, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(out_path, s3_options, mode='wb')
fits_header.tofile(fileobj, *args, **kwargs)
if hasattr(fileobj, 'close'):
fileobj.close()
def fromfile(filename, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(filename, s3_options, mode='rb')
return fits.Header.fromfile(fileobj, *args, **kwargs)
\ No newline at end of file
from .s3_fs import is_s3_path
from .s3_fs import isfile as s3_isfile
from .s3_fs import delete as s3_delete
from .local_fs import delete as local_delete
from .local_fs import isfile as local_isfile
def isfile(path: str, *args, **kwargs) -> bool:
if is_s3_path(path):
return s3_isfile(path, *args, **kwargs)
else:
return local_isfile(path, *args, **kwargs)
def delete(path, **kwargs):
if is_s3_path(path):
s3_delete(path, **kwargs)
else:
local_delete(path, **kwargs)
import fsspec
def open_fileobj(path: str, s3_options, mode='w'):
if(path.startswith("s3")):
s3_fs = fsspec.filesystem('s3', **s3_options)
fileobj = s3_fs.open(path, mode=mode).__enter__()
else:
fileobj = open(path, mode=mode).__enter__()
return fileobj
\ No newline at end of file
import os
def isfile(path: str) -> bool:
return os.path.isfile(path)
def delete(path: str, *, dir_fd=None, **kwargs):
os.remove(path, dir_fd=dir_fd)
\ No newline at end of file
import os
import json
default_s3_options = {
'key': 'minioadmin',
'secret': 'minioadmin',
'endpoint_url': 'http://localhost:9000/'
}
def load_from_env():
s3_options = {
'key': os.getenv('S3_KEY'),
'secret': os.getenv('S3_SECRET'),
'endpoint_url': os.getenv('S3_ENDPOINT_URL')
}
return s3_options
def load_s3_options():
if 'S3_KEY' in os.environ:
return load_from_env()
return default_s3_options
\ No newline at end of file
import fsspec
from .s3_config import load_s3_options
from .fsspec_fileobj import open_fileobj
def put(lpath, rpath, recursive=False, callback=fsspec.callbacks.DEFAULT_CALLBACK, maxdepth=None, s3_options=load_s3_options(), **kwargs,):
s3_fs = fsspec.filesystem('s3', **s3_options)
s3_fs.put(lpath, rpath, recursive, callback, maxdepth, **kwargs)
def get(rpath, lpath, recursive=False, callback=fsspec.callbacks.DEFAULT_CALLBACK, maxdepth=None, s3_options=load_s3_options(), **kwargs,):
s3_fs = fsspec.filesystem('s3', **s3_options)
s3_fs.get(rpath, lpath, recursive, callback, maxdepth, **kwargs)
def info(path, bucket=None, key=None, refresh=False, version_id=None, s3_options=load_s3_options()) -> dict:
s3_fs = fsspec.filesystem('s3', **s3_options)
return s3_fs.info(path, bucket, key, refresh, version_id)
def open(file: str, mode: str='r', s3_options=load_s3_options()):
return open_fileobj(path=file, s3_options=s3_options, mode=mode)
def isfile(path: str, s3_options: dict=load_s3_options()) -> bool:
s3_fs = fsspec.filesystem('s3', **s3_options)
return s3_fs.isfile(path)
def delete(path: str, *, recursive=False, maxdepth=None, s3_options: dict=load_s3_options(), **kwargs):
s3_fs = fsspec.filesystem('s3', **s3_options)
s3_fs.delete(path, recursive, maxdepth)
def is_s3_path(path: str):
if(path.startswith("s3")):
return True
return False
import fsspec
from astropy import table
from ..fsspec_fileobj import open_fileobj
from ..s3_config import load_s3_options
def read(filename, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(filename, s3_options, mode='rb')
return table.Table.read(fileobj, *args, **kwargs)
def write(table_to_write: table.Table, filename, *args, s3_options=load_s3_options(), **kwargs):
fileobj = open_fileobj(filename, s3_options, mode='wb')
return table_to_write.write(fileobj, *args, **kwargs)
\ No newline at end of file
from setuptools import setup, find_packages
setup(
name='csst_fs',
version='0.1.0',
packages=find_packages(),
install_requires=[
'astropy>=5.3',
'fsspec>=2024.5.0',
's3fs>=2024.5.0'
],
python_requires='>=3.9',
description='csst pipeline handle file in local file system and remote s3 file system',
long_description=open('README.md', encoding='utf-8').read(),
long_description_content_type='text/markdown',
url='http://gitee.zhejianglab.com/enterprise/csst-fs',
author='Pan Qi',
author_email='qipan@zhejianglab.com'
)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment