import ray import ray.rllib.agents.ppo as ppo from ray import serve def train_ppo_model(): trainer = ppo.PPOTrainer( config={"framework": "torch", "num_workers": 0}, env="CartPole-v0", ) # Train for one iteration trainer.train() trainer.save("/tmp/rllib_checkpoint") return "/tmp/rllib_checkpoint/checkpoint_000001/checkpoint-1" checkpoint_path = train_ppo_model() # This can be useful if you don't want to clutter the page with details. import ray import ray.rllib.agents.ppo as ppo from ray import serve ray.shutdown()