diff --git a/docs/source/examples/dataframes-example.ipynb b/docs/source/examples/dataframes-example.ipynb index 04dacaa..0c3731f 100644 --- a/docs/source/examples/dataframes-example.ipynb +++ b/docs/source/examples/dataframes-example.ipynb @@ -85,8 +85,8 @@ "\n", "items, dataframe = geodes.search_items(\n", " query={\n", - " \"spaceborne:continentsID\": {\"eq\": \"AF\"},\n", - " \"temporal:endDate\": {\"gte\": date},\n", + " \"continent_code\": {\"eq\": \"AF\"},\n", + " \"end_datetime\": {\"gte\": date},\n", " },\n", " return_df=True,\n", " get_all=False,\n", @@ -98,7 +98,7 @@ "id": "f642b56e-1b3c-4b11-b0dd-84cf15481027", "metadata": {}, "source": [ - "Let's add to our result dataframe the column `spaceborne:cloudCover` : " + "Let's add to our result dataframe the column `eo:cloud_cover` : " ] }, { @@ -112,7 +112,7 @@ "source": [ "from pygeodes.utils.formatting import format_items\n", "\n", - "dataframe = format_items(dataframe, {\"spaceborne:cloudCover\"})" + "dataframe = format_items(dataframe, {\"eo:cloud_cover\"})" ] }, { @@ -621,7 +621,7 @@ } ], "source": [ - "dataframe.explore(column=\"spaceborne:cloudCover\", cmap=\"Blues\")" + "dataframe.explore(column=\"eo:cloud_cover\", cmap=\"Blues\")" ] }, { @@ -630,7 +630,7 @@ "metadata": {}, "source": [ "### With literal data\n", - "It can also work with literal data, like `spaceborne:productLevel` : " + "It can also work with literal data, like `processing:level` : " ] }, { @@ -642,7 +642,7 @@ }, "outputs": [], "source": [ - "dataframe = format_items(dataframe, {\"spaceborne:productLevel\"})" + "dataframe = format_items(dataframe, {\"processing:level\"})" ] }, { @@ -918,7 +918,7 @@ } ], "source": [ - "dataframe.explore(column=\"spaceborne:productLevel\", cmap=\"Dark2\")" + "dataframe.explore(column=\"processing:level\", cmap=\"Dark2\")" ] }, { @@ -962,7 +962,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "dataframe.plot(column=\"spaceborne:productLevel\", legend=True)" + "dataframe.plot(column=\"processing:level\", legend=True)" ] }, { @@ -1003,7 +1003,7 @@ } ], "source": [ - "dataframe.plot(kind=\"hist\", column=\"spaceborne:cloudCover\", range=(0, 100))" + "dataframe.plot(kind=\"hist\", column=\"eo:cloud_cover\", range=(0, 100))" ] }, { @@ -1017,9 +1017,9 @@ ], "metadata": { "kernelspec": { - "display_name": "demo_pygeodes", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "demo_pygeodes" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1031,7 +1031,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.4" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/source/examples/s3_example.ipynb b/docs/source/examples/s3_example.ipynb index ee5e4bb..da4341f 100644 --- a/docs/source/examples/s3_example.ipynb +++ b/docs/source/examples/s3_example.ipynb @@ -145,8 +145,8 @@ "items, dataframe = geodes.search_items(\n", " intersects=geometry,\n", " query={\n", - " \"spaceborne:cloudCover\": {\"lte\": 5},\n", - " \"temporal:endDate\": {\"gte\": date},\n", + " \"eo:cloud_cover\": {\"lte\": 5},\n", + " \"end_datetime\": {\"gte\": date},\n", " },\n", ")" ] diff --git a/docs/source/examples/search-and-download.ipynb b/docs/source/examples/search-and-download.ipynb index e5f5eec..832e191 100644 --- a/docs/source/examples/search-and-download.ipynb +++ b/docs/source/examples/search-and-download.ipynb @@ -105,8 +105,8 @@ "from pygeodes.utils.datetime_utils import complete_datetime_from_str\n", "\n", "query = {\n", - " \"spaceborne:tile\": {\"eq\": \"T31TCK\"},\n", - " \"temporal:endDate\": {\"gte\": complete_datetime_from_str(\"2023-01-01\")},\n", + " \"grid:code\": {\"eq\": \"T31TCK\"},\n", + " \"end_datetime\": {\"gte\": complete_datetime_from_str(\"2023-01-01\")},\n", "}\n", "items, dataframe = geodes.search_items(query=query)" ] @@ -402,7 +402,7 @@ "source": [ "from pygeodes.utils.formatting import format_items\n", "\n", - "dataframe_new = format_items(dataframe, {\"spaceborne:cloudCover\"})" + "dataframe_new = format_items(dataframe, {\"eo:cloud_cover\"})" ] }, { @@ -423,7 +423,7 @@ }, "outputs": [], "source": [ - "dataframe_filtered = dataframe_new[dataframe_new[\"spaceborne:cloudCover\"] < 30]" + "dataframe_filtered = dataframe_new[dataframe_new[\"eo:cloud_cover\"] < 30]" ] }, { @@ -934,9 +934,9 @@ ], "metadata": { "kernelspec": { - "display_name": "demo_finale", + "display_name": "pygeodes", "language": "python", - "name": "demo_finale" + "name": "pygeodes" }, "language_info": { "codemirror_mode": { @@ -948,7 +948,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.4" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/docs/source/user_guide/cli.rst b/docs/source/user_guide/cli.rst index d88e7e2..728eed9 100644 --- a/docs/source/user_guide/cli.rst +++ b/docs/source/user_guide/cli.rst @@ -53,7 +53,7 @@ This will give you an **overview** of the results (as it's not very convenient t Searching collections --------------------- -You can also search for collections by adding the parameter :option:`-c, --collections`. It allow only one argument, with is a search term that will be searched in the description and the title of the collections. +You can also search for collections by adding the parameter :option:`-cs, --collections_search`. It allow only one argument, with is a search term that will be searched in the description and the title of the collections. For example, to search a collection which is related to the term *grd*, you can do : .. code-block:: bash diff --git a/docs/source/user_guide/download_item.rst b/docs/source/user_guide/download_item.rst index 340994d..43a1f1d 100644 --- a/docs/source/user_guide/download_item.rst +++ b/docs/source/user_guide/download_item.rst @@ -83,10 +83,4 @@ Downloading from S3 ------------------- If you provided your S3 credentials in your conf, you can use `boto3 `__ to download items directly from the datalake. -Provided your conf contains your S3 credentials (see :doc:`configuration`), any use of ``geodes.download_item_archive`` or ``item.download_archive`` will use the S3 client instead of geodes. -If you wish to use the s3 client for other purpose (exploring buckets for example), you can use the S3 client this way : - -.. code-block:: python - - for bucket in geodes.s3_client.buckets.all(): # s3_client is already configured with your credentials - print(bucket.name) \ No newline at end of file +Provided your conf contains your S3 credentials (see :doc:`configuration`), any use of ``geodes.download_item_archive`` or ``item.download_archive`` will use the S3 client instead of geodes. \ No newline at end of file diff --git a/docs/source/user_guide/manipulating_objects.rst b/docs/source/user_guide/manipulating_objects.rst index b11bba6..2ddd063 100644 --- a/docs/source/user_guide/manipulating_objects.rst +++ b/docs/source/user_guide/manipulating_objects.rst @@ -18,14 +18,14 @@ From STAC objects to dataframes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To create you first dataframe from STAC objects, you can use :py:func:`pygeodes.utils.formatting.format_collections` and :py:func:`pygeodes.utils.formatting.format_items`. -For example from a list of :py:class:`pygeodes.utils.stac.Item`, if I want to create a dataframe and add the column ``spaceborne:cloudCover`` : +For example from a list of :py:class:`pygeodes.utils.stac.Item`, if I want to create a dataframe and add the column ``eo:cloud_cover`` : .. code-block:: python from pygeodes.utils.formatting import format_items items = [item1,item2,...] - dataframe = format_items(items,columns_to_add={"spaceborne:cloudCover"}) + dataframe = format_items(items,columns_to_add={"eo:cloud_cover"}) But if I put a dataframe instead of a list of items in ``format_items``, the columns will be added to the ones already in the dataframe. @@ -40,8 +40,8 @@ After having added the columns you want, you can filter your data using the data .. code-block:: python - dataframe = format_items(items,columns_to_add={"spaceborne:cloudCover"}) - filtered = dataframe[dataframe["spaceborne:cloudCover"] <= 10] + dataframe = format_items(items,columns_to_add={"eo:cloud_cover"}) + filtered = dataframe[dataframe["eo:cloud_cover"] <= 10] .. seealso:: @@ -60,26 +60,6 @@ Once we filtered our dataframe of items, we could want to download them, so we n for item in items: item.download_archive() -.. _serialization_of_dataframes: -Serialization of dataframes -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -You could want to serialize a dataframe to work with it later, it's possible using :py:func:`pygeodes.utils.formatting.export_dataframe` - -.. code-block:: python - - from pygeodes.utils.formatting import export_dataframe - - export_dataframe(dataframe,"df.json") - -and you can load it later using :py:func:`pygeodes.utils.formatting.load_dataframe` : - -.. code-block:: python - - from pygeodes.utils.formatting import export_dataframe - - dataframe = load_dataframe("df.json") - Plotting and exploring data using dataframes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/user_guide/quickstart.ipynb b/docs/source/user_guide/quickstart.ipynb index 3fd99e8..08d93ae 100644 --- a/docs/source/user_guide/quickstart.ipynb +++ b/docs/source/user_guide/quickstart.ipynb @@ -19,7 +19,19 @@ "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'Geodes' from 'pygeodes' (unknown location)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpygeodes\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Geodes\n\u001b[1;32m 3\u001b[0m geodes \u001b[38;5;241m=\u001b[39m Geodes()\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'Geodes' from 'pygeodes' (unknown location)" + ] + } + ], "source": [ "from pygeodes import Geodes\n", "\n", @@ -451,7 +463,7 @@ "source": [ "new_dataframe = format_collections(\n", " collections,\n", - " columns_to_add={\"summaries.dcs:satellite\", \"summaries.dcs:sensor\"},\n", + " columns_to_add={\"summaries.constellation\", \"summaries.instruments\"},\n", ")" ] }, @@ -604,7 +616,7 @@ "id": "003205c0-7707-4d1d-9a1e-0bbf6922176e", "metadata": {}, "source": [ - "We see we can use `spaceborne:absoluteOrbitID`. Let's search for example those whose orbit direction is 30972: " + "We see we can use `sat:absolute_orbit`. Let's search for example those whose orbit direction is 30972: " ] }, { @@ -646,8 +658,8 @@ } ], "source": [ - "query = {\"spaceborne:absoluteOrbitID\": {\"eq\": 30972}}\n", - "items, dataframe = geodes.search_items(query=query)" + "query = {\"sat:absolute_orbit\": {\"eq\": 30972}}\n", + "items, dataframe = geodes.search_items(query=query, collections=['PEPS_S2_L1C'])" ] }, { @@ -658,29 +670,6 @@ "Again, we come out with an `items` object, and a `dataframe` object." ] }, - { - "cell_type": "code", - "execution_count": 13, - "id": "68ab9b50-6120-4eb6-85ff-fefabdb96708", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "852" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(items)" - ] - }, { "cell_type": "code", "execution_count": 14, @@ -973,7 +962,7 @@ "id": "8f3688df-a3d6-4e11-9138-1ab2c4e1e958", "metadata": {}, "source": [ - "We see we can use `spaceborne:cloudCover`. We can add it using `format_items`." + "We see we can use `eo:cloud_cover`. We can add it using `format_items`." ] }, { @@ -988,7 +977,7 @@ "from pygeodes.utils.formatting import format_items\n", "\n", "new_dataframe = format_items(\n", - " dataframe, columns_to_add={\"spaceborne:cloudCover\"}\n", + " dataframe, columns_to_add={\"eo:cloud_cover\"}\n", ")" ] }, @@ -1217,8 +1206,8 @@ "outputs": [], "source": [ "filtered = new_dataframe[\n", - " (new_dataframe[\"spaceborne:cloudCover\"] <= 40)\n", - " & (new_dataframe[\"spaceborne:cloudCover\"] >= 39)\n", + " (new_dataframe[\"eo:cloud_cover\"] <= 40)\n", + " & (new_dataframe[\"eo:cloud_cover\"] >= 39)\n", "]" ] }, @@ -2306,9 +2295,9 @@ ], "metadata": { "kernelspec": { - "display_name": "demo_pygeodes", + "display_name": "pygeodes", "language": "python", - "name": "demo_pygeodes" + "name": "pygeodes" }, "language_info": { "codemirror_mode": { @@ -2320,7 +2309,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.4" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/docs/source/user_guide/search_collections.rst b/docs/source/user_guide/search_collections.rst index 9713591..8ee3f96 100644 --- a/docs/source/user_guide/search_collections.rst +++ b/docs/source/user_guide/search_collections.rst @@ -16,7 +16,7 @@ But you can also provide a query in JSON format : .. code-block:: python - query = {'id' : {'contains' : 'PEPS'}} + query = {'title' : {'contains' : 'PEPS'}} collections,dataframe = geodes.search_collections(query=query) .. seealso:: @@ -38,12 +38,6 @@ If you wish to get only the collections, you can use the parameter ``return_df=F collections = geodes.search_collections(query=query,return_df=False) -By default, it returns all the objects corresponding to your query, so it can be long (making many API calls) if your query is not really precise. You could just want a little overview of the objects, you can set the parameter ``get_all=False``, to get just the first items returned (by making just one API call). - -.. code-block:: python - - collections = geodes.search_collections(query=query,return_df=False,get_all=False) - .. seealso:: You can refer to the implementation of ``search_collections`` for further details (:py:meth:`Geodes.search_collections`) \ No newline at end of file diff --git a/docs/source/user_guide/search_items.rst b/docs/source/user_guide/search_items.rst index 98a4ad4..dfd9545 100644 --- a/docs/source/user_guide/search_items.rst +++ b/docs/source/user_guide/search_items.rst @@ -10,7 +10,7 @@ Then you can start making some queries, let's start by retrieving all items whos .. code-block:: python - query = {"spaceborne:absoluteOrbitID" : {"eq" : 30972}} + query = {"sat:absolute_orbit" : {"eq" : 30972}} items,dataframe = geodes.search_items(query=query) .. seealso:: diff --git a/pygeodes/cli/cli.py b/pygeodes/cli/cli.py index 4ee4bdb..685332c 100644 --- a/pygeodes/cli/cli.py +++ b/pygeodes/cli/cli.py @@ -47,7 +47,7 @@ def download(args): else: geodes = Geodes(conf=args.conf) items = geodes.search_items( - query={"accessService:endpointURL": {"contains": args.id}}, + query={"identifier": {"eq": args.id}}, quiet=True, return_df=False, get_all=False, @@ -100,7 +100,7 @@ def search(args): query_dict["id"] = {"eq" : args.id}""" if args.data_type: query_dict["dataType"] = {"eq": args.data_type} - date_arg = "temporal:endDate" # we arbitrarily chose to consider endDate as the date, as startDate and endDate are usually really close + date_arg = "end_datetime" # we arbitrarily chose to consider endDate as the date, as startDate and endDate are usually really close -> TODO if args.start_date or args.end_date: query_dict[date_arg] = {} if args.start_date: diff --git a/pygeodes/data/model.json b/pygeodes/data/model.json index 42ee99b..e0b5952 100644 --- a/pygeodes/data/model.json +++ b/pygeodes/data/model.json @@ -1,57 +1,57 @@ { - "version": "v7.0", + "version": "v8.0", "attributes": [ - "dataType (STRING)", - "date (DATE_ISO8601)", + "dataset (STRING)", + "datetime (DATE_ISO8601)", "links (STRING)", - "productValidity (BOOLEAN)", - "doi (STRING_ARRAY)", - "withoutGeom (BOOLEAN)", - "temporal:startDate (DATE_ISO8601)", - "temporal:endDate (DATE_ISO8601)", - "temporal:processingDate (DATE_ISO8601)", - "processing:processingMode (STRING)", - "processing:processingContext (STRING)", - "processing:processingCorrection (STRING)", - "processing:processingVersion (STRING)", - "spatial:bbox (STRING)", - "spatial:nbCols (STRING)", - "spatial:nbRows (STRING)", - "spaceborne:satelliteSensor (STRING)", - "spaceborne:satellitePlatform (STRING)", - "spaceborne:sensorMode (STRING)", - "spaceborne:productLevel (STRING)", - "spaceborne:polarization (STRING)", - "spaceborne:cycleID (INTEGER)", - "spaceborne:missionTakeId (INTEGER)", - "spaceborne:s2TakeId (STRING)", - "spaceborne:orbitID (INTEGER)", - "spaceborne:absoluteOrbitID (LONG)", - "spaceborne:orbitDirection (STRING)", - "spaceborne:productType (STRING)", - "spaceborne:parameter (STRING)", - "spaceborne:pparameter (STRING)", - "spaceborne:product (STRING)", - "spaceborne:timeresolution (STRING)", - "spaceborne:classification (STRING)", - "spaceborne:swath (STRING)", - "spaceborne:bands (STRING)", - "spaceborne:tile (STRING)", - "spaceborne:cloudCover (DOUBLE)", - "spaceborne:waterCover (DOUBLE)", - "spaceborne:percentSaturatedPixelsMax (DOUBLE)", - "spaceborne:percentNoDataPixelsMax (DOUBLE)", - "spaceborne:nbColInterpolationErrorMax (DOUBLE)", - "spaceborne:percentGroundUsefulPixels (DOUBLE)", - "spaceborne:percentUsefulPixelsMin (DOUBLE)", - "spaceborne:angle (DOUBLE)", - "spaceborne:pitch (DOUBLE)", - "spaceborne:roll (DOUBLE)", - "spaceborne:continentsID (STRING_ARRAY)", - "spaceborne:area (DOUBLE)", - "spaceborne:subSwath (STRING)", - "spaceborne:subTile (STRING)", - "spaceborne:keywords (STRING_ARRAY)", - "spaceborne:political (JSON)" + "product_validity (BOOLEAN)", + "sci:doi (STRING_ARRAY)", + "no_geometry (BOOLEAN)", + "start_datetime (DATE_ISO8601)", + "end_datetime (DATE_ISO8601)", + "processing:datetime (DATE_ISO8601)", + "processing:lineage (STRING)", + "processing_context (STRING)", + "processing_correction (STRING)", + "processing:version (STRING)", + "bbox (STRING)", + "nb_cols (STRING)", + "nb_rows (STRING)", + "instrument (STRING)", + "platform (STRING)", + "sar:instrument_mode (STRING)", + "processing:level (STRING)", + "sar:polarizations (STRING)", + "sat:orbit_cycle (INTEGER)", + "mission_take_id (INTEGER)", + "s2:datatake_id (STRING)", + "sat:relative_orbit (INTEGER)", + "sat:absolute_orbit (LONG)", + "sat:orbit_state (STRING)", + "product:type (STRING)", + "parameter (STRING)", + "pparameter (STRING)", + "product (STRING)", + "temporal_resolution (STRING)", + "classification (STRING)", + "swath (STRING)", + "bands (STRING)", + "grid:code (STRING)", + "eo:cloud_cover (DOUBLE)", + "water_cover (DOUBLE)", + "saturated_defective_pixel (DOUBLE)", + "nodata_pixel (DOUBLE)", + "nb_col_interpolation_error (DOUBLE)", + "ground_useful_pixel (DOUBLE)", + "min_useful_pixel (DOUBLE)", + "sensor_angle (DOUBLE)", + "sensor_pitch (DOUBLE)", + "sensor_roll (DOUBLE)", + "continent_code (STRING_ARRAY)", + "area (DOUBLE)", + "sar:beam_ids (STRING)", + "subTile (STRING)", + "keywords (STRING_ARRAY)", + "political (JSON)" ] } \ No newline at end of file diff --git a/pygeodes/geodes.py b/pygeodes/geodes.py index 73e0514..2a8bba7 100644 --- a/pygeodes/geodes.py +++ b/pygeodes/geodes.py @@ -326,7 +326,7 @@ def download_item_archive(self, item: Item, outfile: str = None): if self.s3_client is not None: download_for_profile = Download( - url=item.find("accessService:endpointURL"), destination=outfile + url=item.find("endpoint_url"), destination=outfile ) download_for_profile.start() load_profile_and_save_download(download_for_profile) diff --git a/pygeodes/utils/s3.py b/pygeodes/utils/s3.py index eb3a448..c266432 100644 --- a/pygeodes/utils/s3.py +++ b/pygeodes/utils/s3.py @@ -53,7 +53,7 @@ def download_item(client, item: Item, outfile: str): ) return name_for_same_file - url = item.find("accessService:endpointURL") + url = item.find("endpoint_url") # TODO: check bucket, key = get_bucket_and_key_from_url(url) logger.debug(f"using {bucket=} and {key=}") client.download_file(Bucket=bucket, Key=key, Filename=outfile) diff --git a/pygeodes/utils/stac.py b/pygeodes/utils/stac.py index f2903ea..a945a81 100644 --- a/pygeodes/utils/stac.py +++ b/pygeodes/utils/stac.py @@ -59,7 +59,7 @@ def from_dict(cls, dico: dict): return super().from_dict(correct_stac_version(dico)) def to_dict(self): - return super().to_dict(transform_hrefs=False) + return super().to_dict(transform_hrefs=False) # TODO def find(self, key: str): from pygeodes.utils.formatting import get_from_dico_path @@ -111,6 +111,10 @@ def download_archive(self, outfile: str = None): geodes = Geodes.get_last_instance() geodes.download_item_archive(item=self, outfile=outfile) + + @classmethod + def from_dict(cls, dico: dict): + return super().from_dict(correct_stac_version(dico)) def __str__(self): return self.__repr__()